nsz Git - libfirm/blob - ir/opt/opt_inline.c

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief    Dead node elimination and Procedure Inlining.
  23  * @author   Michael Beck, Goetz Lindenmaier
  24  * @version  $Id$
  25  */
  26 #include "config.h"
  27
  28 #include <limits.h>
  29 #include <assert.h>
  30
  31 #include "irnode_t.h"
  32 #include "irgraph_t.h"
  33 #include "irprog_t.h"
  34
  35 #include "iroptimize.h"
  36 #include "ircons_t.h"
  37 #include "iropt_t.h"
  38 #include "irgopt.h"
  39 #include "irgmod.h"
  40 #include "irgwalk.h"
  41
  42 #include "array_t.h"
  43 #include "list.h"
  44 #include "pset.h"
  45 #include "pmap.h"
  46 #include "pdeq.h"
  47 #include "xmalloc.h"
  48 #include "pqueue.h"
  49
  50 #include "irouts.h"
  51 #include "irloop_t.h"
  52 #include "irbackedge_t.h"
  53 #include "opt_inline_t.h"
  54 #include "cgana.h"
  55 #include "trouts.h"
  56 #include "error.h"
  57
  58 #include "analyze_irg_args.h"
  59 #include "iredges_t.h"
  60 #include "irflag_t.h"
  61 #include "irhooks.h"
  62 #include "irtools.h"
  63 #include "iropt_dbg.h"
  64 #include "irpass_t.h"
  65
  66 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  67
  68 /*------------------------------------------------------------------*/
  69 /* Routines for dead node elimination / copying garbage collection  */
  70 /* of the obstack.                                                  */
  71 /*------------------------------------------------------------------*/
  72
  73 /**
  74  * Remember the new node in the old node by using a field all nodes have.
  75  */
  76 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
  77
  78 /**
  79  * Get this new node, before the old node is forgotten.
  80  */
  81 #define get_new_node(oldn) get_irn_link(oldn)
  82
  83 /**
  84  * Check if a new node was set.
  85  */
  86 #define has_new_node(n) (get_new_node(n) != NULL)
  87
  88 /**
  89  * We use the block_visited flag to mark that we have computed the
  90  * number of useful predecessors for this block.
  91  * Further we encode the new arity in this flag in the old blocks.
  92  * Remembering the arity is useful, as it saves a lot of pointer
  93  * accesses.  This function is called for all Phi and Block nodes
  94  * in a Block.
  95  */
  96 static inline int
  97 compute_new_arity(ir_node *b) {
  98         int i, res, irn_arity;
  99         int irg_v, block_v;
 100
 101         irg_v = get_irg_block_visited(current_ir_graph);
 102         block_v = get_Block_block_visited(b);
 103         if (block_v >= irg_v) {
 104                 /* we computed the number of preds for this block and saved it in the
 105                    block_v flag */
 106                 return block_v - irg_v;
 107         } else {
 108                 /* compute the number of good predecessors */
 109                 res = irn_arity = get_irn_arity(b);
 110                 for (i = 0; i < irn_arity; i++)
 111                         if (is_Bad(get_irn_n(b, i))) res--;
 112                         /* save it in the flag. */
 113                         set_Block_block_visited(b, irg_v + res);
 114                         return res;
 115         }
 116 }
 117
 118 /**
 119  * Copies the node to the new obstack. The Ins of the new node point to
 120  * the predecessors on the old obstack.  For block/phi nodes not all
 121  * predecessors might be copied.  n->link points to the new node.
 122  * For Phi and Block nodes the function allocates in-arrays with an arity
 123  * only for useful predecessors.  The arity is determined by counting
 124  * the non-bad predecessors of the block.
 125  *
 126  * @param n    The node to be copied
 127  * @param env  if non-NULL, the node number attribute will be copied to the new node
 128  *
 129  * Note: Also used for loop unrolling.
 130  */
 131 static void copy_node(ir_node *n, void *env) {
 132         ir_node *nn, *block;
 133         int new_arity;
 134         ir_op *op = get_irn_op(n);
 135         (void) env;
 136
 137         if (op == op_Bad) {
 138                 /* node copied already */
 139                 return;
 140         } else if (op == op_Block) {
 141                 block = NULL;
 142                 new_arity = compute_new_arity(n);
 143                 n->attr.block.graph_arr = NULL;
 144         } else {
 145                 block = get_nodes_block(n);
 146                 if (op == op_Phi) {
 147                         new_arity = compute_new_arity(block);
 148                 } else {
 149                         new_arity = get_irn_arity(n);
 150                 }
 151         }
 152         nn = new_ir_node(get_irn_dbg_info(n),
 153                 current_ir_graph,
 154                 block,
 155                 op,
 156                 get_irn_mode(n),
 157                 new_arity,
 158                 get_irn_in(n) + 1);
 159         /* Copy the attributes.  These might point to additional data.  If this
 160            was allocated on the old obstack the pointers now are dangling.  This
 161            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
 162         if (op == op_Block) {
 163                 /* we cannot allow blocks WITHOUT macroblock input */
 164                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
 165         }
 166         copy_node_attr(n, nn);
 167
 168         if (env != NULL) {
 169                 /* for easier debugging, we want to copy the node numbers too */
 170                 nn->node_nr = n->node_nr;
 171         }
 172
 173         set_new_node(n, nn);
 174         hook_dead_node_elim_subst(current_ir_graph, n, nn);
 175 }
 176
 177 /**
 178  * Copies new predecessors of old node to new node remembered in link.
 179  * Spare the Bad predecessors of Phi and Block nodes.
 180  */
 181 static void copy_preds(ir_node *n, void *env) {
 182         ir_node *nn, *block;
 183         int i, j, irn_arity;
 184         (void) env;
 185
 186         nn = get_new_node(n);
 187
 188         if (is_Block(n)) {
 189                 /* copy the macro block header */
 190                 ir_node *mbh = get_Block_MacroBlock(n);
 191
 192                 if (mbh == n) {
 193                         /* this block is a macroblock header */
 194                         set_Block_MacroBlock(nn, nn);
 195                 } else {
 196                         /* get the macro block header */
 197                         ir_node *nmbh = get_new_node(mbh);
 198                         assert(nmbh != NULL);
 199                         set_Block_MacroBlock(nn, nmbh);
 200                 }
 201
 202                 /* Don't copy Bad nodes. */
 203                 j = 0;
 204                 irn_arity = get_irn_arity(n);
 205                 for (i = 0; i < irn_arity; i++) {
 206                         if (! is_Bad(get_irn_n(n, i))) {
 207                                 ir_node *pred = get_irn_n(n, i);
 208                                 set_irn_n(nn, j, get_new_node(pred));
 209                                 j++;
 210                         }
 211                 }
 212                 /* repair the block visited flag from above misuse. Repair it in both
 213                    graphs so that the old one can still be used. */
 214                 set_Block_block_visited(nn, 0);
 215                 set_Block_block_visited(n, 0);
 216                 /* Local optimization could not merge two subsequent blocks if
 217                    in array contained Bads.  Now it's possible.
 218                    We don't call optimize_in_place as it requires
 219                    that the fields in ir_graph are set properly. */
 220                 if (!has_Block_entity(nn) &&
 221                     get_opt_control_flow_straightening() &&
 222                     get_Block_n_cfgpreds(nn) == 1 &&
 223                     is_Jmp(get_Block_cfgpred(nn, 0))) {
 224                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
 225                         if (nn == old) {
 226                                 /* Jmp jumps into the block it is in -- deal self cycle. */
 227                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
 228                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
 229                         } else {
 230                                 exchange(nn, old);
 231                         }
 232                 }
 233         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
 234                 /* Don't copy node if corresponding predecessor in block is Bad.
 235                    The Block itself should not be Bad. */
 236                 block = get_nodes_block(n);
 237                 set_nodes_block(nn, get_new_node(block));
 238                 j = 0;
 239                 irn_arity = get_irn_arity(n);
 240                 for (i = 0; i < irn_arity; i++) {
 241                         if (! is_Bad(get_irn_n(block, i))) {
 242                                 ir_node *pred = get_irn_n(n, i);
 243                                 set_irn_n(nn, j, get_new_node(pred));
 244                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
 245                                 j++;
 246                         }
 247                 }
 248                 /* If the pre walker reached this Phi after the post walker visited the
 249                    block block_visited is > 0. */
 250                 set_Block_block_visited(get_nodes_block(n), 0);
 251                 /* Compacting the Phi's ins might generate Phis with only one
 252                    predecessor. */
 253                 if (get_irn_arity(nn) == 1)
 254                         exchange(nn, get_irn_n(nn, 0));
 255         } else {
 256                 irn_arity = get_irn_arity(n);
 257                 for (i = -1; i < irn_arity; i++)
 258                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
 259         }
 260         /* Now the new node is complete.  We can add it to the hash table for CSE.
 261            @@@ inlining aborts if we identify End. Why? */
 262         if (!is_End(nn))
 263                 add_identities(current_ir_graph->value_table, nn);
 264 }
 265
 266 /**
 267  * Copies the graph recursively, compacts the keep-alives of the end node.
 268  *
 269  * @param irg           the graph to be copied
 270  * @param copy_node_nr  If non-zero, the node number will be copied
 271  */
 272 static void copy_graph(ir_graph *irg, int copy_node_nr) {
 273         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
 274         ir_node *ka;      /* keep alive */
 275         int i, irn_arity;
 276         unsigned long vfl;
 277
 278         /* Some nodes must be copied by hand, sigh */
 279         vfl = get_irg_visited(irg);
 280         set_irg_visited(irg, vfl + 1);
 281
 282         oe = get_irg_end(irg);
 283         mark_irn_visited(oe);
 284         /* copy the end node by hand, allocate dynamic in array! */
 285         ne = new_ir_node(get_irn_dbg_info(oe),
 286                 irg,
 287                 NULL,
 288                 op_End,
 289                 mode_X,
 290                 -1,
 291                 NULL);
 292         /* Copy the attributes.  Well, there might be some in the future... */
 293         copy_node_attr(oe, ne);
 294         set_new_node(oe, ne);
 295
 296         /* copy the Bad node */
 297         ob = get_irg_bad(irg);
 298         mark_irn_visited(ob);
 299         nb = new_ir_node(get_irn_dbg_info(ob),
 300                 irg,
 301                 NULL,
 302                 op_Bad,
 303                 mode_T,
 304                 0,
 305                 NULL);
 306         copy_node_attr(ob, nb);
 307         set_new_node(ob, nb);
 308
 309         /* copy the NoMem node */
 310         om = get_irg_no_mem(irg);
 311         mark_irn_visited(om);
 312         nm = new_ir_node(get_irn_dbg_info(om),
 313                 irg,
 314                 NULL,
 315                 op_NoMem,
 316                 mode_M,
 317                 0,
 318                 NULL);
 319         copy_node_attr(om, nm);
 320         set_new_node(om, nm);
 321
 322         /* copy the live nodes */
 323         set_irg_visited(irg, vfl);
 324         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 325
 326         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
 327
 328         /* visit the anchors as well */
 329         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 330                 ir_node *n = get_irg_anchor(irg, i);
 331
 332                 if (n && (get_irn_visited(n) <= vfl)) {
 333                         set_irg_visited(irg, vfl);
 334                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 335                 }
 336         }
 337
 338         /* copy_preds for the end node ... */
 339         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
 340
 341         /*- ... and now the keep alives. -*/
 342         /* First pick the not marked block nodes and walk them.  We must pick these
 343            first as else we will oversee blocks reachable from Phis. */
 344         irn_arity = get_End_n_keepalives(oe);
 345         for (i = 0; i < irn_arity; i++) {
 346                 ka = get_End_keepalive(oe, i);
 347                 if (is_Block(ka)) {
 348                         if (get_irn_visited(ka) <= vfl) {
 349                                 /* We must keep the block alive and copy everything reachable */
 350                                 set_irg_visited(irg, vfl);
 351                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 352                         }
 353                         add_End_keepalive(ne, get_new_node(ka));
 354                 }
 355         }
 356
 357         /* Now pick other nodes.  Here we will keep all! */
 358         irn_arity = get_End_n_keepalives(oe);
 359         for (i = 0; i < irn_arity; i++) {
 360                 ka = get_End_keepalive(oe, i);
 361                 if (!is_Block(ka)) {
 362                         if (get_irn_visited(ka) <= vfl) {
 363                                 /* We didn't copy the node yet.  */
 364                                 set_irg_visited(irg, vfl);
 365                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 366                         }
 367                         add_End_keepalive(ne, get_new_node(ka));
 368                 }
 369         }
 370
 371         /* start block sometimes only reached after keep alives */
 372         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
 373         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
 374 }
 375
 376 /**
 377  * Copies the graph reachable from current_ir_graph->end to the obstack
 378  * in current_ir_graph and fixes the environment.
 379  * Then fixes the fields in current_ir_graph containing nodes of the
 380  * graph.
 381  *
 382  * @param copy_node_nr  If non-zero, the node number will be copied
 383  */
 384 static void
 385 copy_graph_env(int copy_node_nr) {
 386         ir_graph *irg = current_ir_graph;
 387         ir_node *old_end, *new_anchor;
 388         int i;
 389
 390         /* remove end_except and end_reg nodes */
 391         old_end = get_irg_end(irg);
 392         set_irg_end_except (irg, old_end);
 393         set_irg_end_reg    (irg, old_end);
 394
 395         /* Not all nodes remembered in irg might be reachable
 396            from the end node.  Assure their link is set to NULL, so that
 397            we can test whether new nodes have been computed. */
 398         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 399                 ir_node *n = get_irg_anchor(irg, i);
 400                 if (n != NULL)
 401                         set_new_node(n, NULL);
 402         }
 403         /* we use the block walk flag for removing Bads from Blocks ins. */
 404         inc_irg_block_visited(irg);
 405
 406         /* copy the graph */
 407         copy_graph(irg, copy_node_nr);
 408
 409         /* fix the anchor */
 410         old_end    = get_irg_end(irg);
 411         new_anchor = new_Anchor(irg);
 412
 413         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 414                 ir_node *n = get_irg_anchor(irg, i);
 415                 if (n)
 416                         set_irn_n(new_anchor, i, get_new_node(n));
 417         }
 418         free_End(old_end);
 419         irg->anchor = new_anchor;
 420
 421         /* ensure the new anchor is placed in the endblock */
 422         set_nodes_block(new_anchor, get_irg_end_block(irg));
 423 }
 424
 425 /**
 426  * Copies all reachable nodes to a new obstack.  Removes bad inputs
 427  * from block nodes and the corresponding inputs from Phi nodes.
 428  * Merges single exit blocks with single entry blocks and removes
 429  * 1-input Phis.
 430  * Adds all new nodes to a new hash table for CSE.  Does not
 431  * perform CSE, so the hash table might contain common subexpressions.
 432  */
 433 void dead_node_elimination(ir_graph *irg) {
 434         ir_graph *rem;
 435 #ifdef INTERPROCEDURAL_VIEW
 436         int rem_ipview = get_interprocedural_view();
 437 #endif
 438         struct obstack *graveyard_obst = NULL;
 439         struct obstack *rebirth_obst   = NULL;
 440
 441         edges_deactivate(irg);
 442
 443         /* inform statistics that we started a dead-node elimination run */
 444         hook_dead_node_elim(irg, 1);
 445
 446         /* Remember external state of current_ir_graph. */
 447         rem = current_ir_graph;
 448         current_ir_graph = irg;
 449 #ifdef INTERPROCEDURAL_VIEW
 450         set_interprocedural_view(0);
 451 #endif
 452
 453         assert(get_irg_phase_state(irg) != phase_building);
 454
 455         /* Handle graph state */
 456         free_callee_info(irg);
 457         free_irg_outs(irg);
 458         free_trouts();
 459
 460         /* @@@ so far we loose loops when copying */
 461         free_loop_information(irg);
 462
 463         set_irg_doms_inconsistent(irg);
 464
 465         /* A quiet place, where the old obstack can rest in peace,
 466            until it will be cremated. */
 467         graveyard_obst = irg->obst;
 468
 469         /* A new obstack, where the reachable nodes will be copied to. */
 470         rebirth_obst = XMALLOC(struct obstack);
 471         irg->obst = rebirth_obst;
 472         obstack_init(irg->obst);
 473         irg->last_node_idx = 0;
 474
 475         /* We also need a new value table for CSE */
 476         del_identities(irg->value_table);
 477         irg->value_table = new_identities();
 478
 479         /* Copy the graph from the old to the new obstack */
 480         copy_graph_env(/*copy_node_nr=*/1);
 481
 482         /* Free memory from old unoptimized obstack */
 483         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
 484         xfree(graveyard_obst);            /* ... then free it.           */
 485
 486         /* inform statistics that the run is over */
 487         hook_dead_node_elim(irg, 0);
 488
 489         current_ir_graph = rem;
 490 #ifdef INTERPROCEDURAL_VIEW
 491         set_interprocedural_view(rem_ipview);
 492 #endif
 493 }
 494
 495 ir_graph_pass_t *dead_node_elimination_pass(const char *name) {
 496         return def_graph_pass(name ? name : "dce", dead_node_elimination);
 497 }
 498
 499 /**
 500  * Relink bad predecessors of a block and store the old in array to the
 501  * link field. This function is called by relink_bad_predecessors().
 502  * The array of link field starts with the block operand at position 0.
 503  * If block has bad predecessors, create a new in array without bad preds.
 504  * Otherwise let in array untouched.
 505  */
 506 static void relink_bad_block_predecessors(ir_node *n, void *env) {
 507         ir_node **new_in, *irn;
 508         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
 509         (void) env;
 510
 511         /* if link field of block is NULL, look for bad predecessors otherwise
 512            this is already done */
 513         if (is_Block(n) && get_irn_link(n) == NULL) {
 514                 /* save old predecessors in link field (position 0 is the block operand)*/
 515                 set_irn_link(n, get_irn_in(n));
 516
 517                 /* count predecessors without bad nodes */
 518                 old_irn_arity = get_irn_arity(n);
 519                 for (i = 0; i < old_irn_arity; i++)
 520                         if (!is_Bad(get_irn_n(n, i)))
 521                                 ++new_irn_arity;
 522
 523                 /* arity changing: set new predecessors without bad nodes */
 524                 if (new_irn_arity < old_irn_arity) {
 525                         /* Get new predecessor array. We do not resize the array, as we must
 526                            keep the old one to update Phis. */
 527                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
 528
 529                         /* set new predecessors in array */
 530                         new_in[0] = NULL;
 531                         new_irn_n = 1;
 532                         for (i = 0; i < old_irn_arity; i++) {
 533                                 irn = get_irn_n(n, i);
 534                                 if (!is_Bad(irn)) {
 535                                         new_in[new_irn_n] = irn;
 536                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
 537                                         ++new_irn_n;
 538                                 }
 539                         }
 540                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
 541                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
 542                         n->in = new_in;
 543                 } /* ir node has bad predecessors */
 544         } /* Block is not relinked */
 545 }
 546
 547 /**
 548  * Relinks Bad predecessors from Blocks and Phis called by walker
 549  * remove_bad_predecesors(). If n is a Block, call
 550  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
 551  * function of Phi's Block. If this block has bad predecessors, relink preds
 552  * of the Phi-node.
 553  */
 554 static void relink_bad_predecessors(ir_node *n, void *env) {
 555         ir_node *block, **old_in;
 556         int i, old_irn_arity, new_irn_arity;
 557
 558         /* relink bad predecessors of a block */
 559         if (is_Block(n))
 560                 relink_bad_block_predecessors(n, env);
 561
 562         /* If Phi node relink its block and its predecessors */
 563         if (is_Phi(n)) {
 564                 /* Relink predecessors of phi's block */
 565                 block = get_nodes_block(n);
 566                 if (get_irn_link(block) == NULL)
 567                         relink_bad_block_predecessors(block, env);
 568
 569                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
 570                 old_irn_arity = ARR_LEN(old_in);
 571
 572                 /* Relink Phi predecessors if count of predecessors changed */
 573                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
 574                         /* set new predecessors in array
 575                            n->in[0] remains the same block */
 576                         new_irn_arity = 1;
 577                         for(i = 1; i < old_irn_arity; i++)
 578                                 if (!is_Bad(old_in[i])) {
 579                                         n->in[new_irn_arity] = n->in[i];
 580                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
 581                                         ++new_irn_arity;
 582                                 }
 583
 584                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
 585                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
 586                 }
 587         } /* n is a Phi node */
 588 }
 589
 590 /*
 591  * Removes Bad Bad predecessors from Blocks and the corresponding
 592  * inputs to Phi nodes as in dead_node_elimination but without
 593  * copying the graph.
 594  * On walking up set the link field to NULL, on walking down call
 595  * relink_bad_predecessors() (This function stores the old in array
 596  * to the link field and sets a new in array if arity of predecessors
 597  * changes).
 598  */
 599 void remove_bad_predecessors(ir_graph *irg) {
 600         panic("Fix backedge handling first");
 601         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
 602 }
 603
 604
 605 /*
 606    __                      _  __ __
 607   (_     __    o     _    | \/  |_
 608   __)|_| | \_/ | \_/(/_   |_/\__|__
 609
 610   The following stuff implements a facility that automatically patches
 611   registered ir_node pointers to the new node when a dead node elimination occurs.
 612 */
 613
 614 struct _survive_dce_t {
 615         struct obstack obst;
 616         pmap *places;
 617         pmap *new_places;
 618         hook_entry_t dead_node_elim;
 619         hook_entry_t dead_node_elim_subst;
 620 };
 621
 622 typedef struct _survive_dce_list_t {
 623         struct _survive_dce_list_t *next;
 624         ir_node **place;
 625 } survive_dce_list_t;
 626
 627 static void dead_node_hook(void *context, ir_graph *irg, int start) {
 628         survive_dce_t *sd = context;
 629         (void) irg;
 630
 631         /* Create a new map before the dead node elimination is performed. */
 632         if (start) {
 633                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
 634         } else {
 635                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
 636                 pmap_destroy(sd->places);
 637                 sd->places     = sd->new_places;
 638                 sd->new_places = NULL;
 639         }
 640 }
 641
 642 /**
 643  * Hook called when dead node elimination replaces old by nw.
 644  */
 645 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw) {
 646         survive_dce_t *sd = context;
 647         survive_dce_list_t *list = pmap_get(sd->places, old);
 648         (void) irg;
 649
 650         /* If the node is to be patched back, write the new address to all registered locations. */
 651         if (list) {
 652                 survive_dce_list_t *p;
 653
 654                 for (p = list; p; p = p->next)
 655                         *(p->place) = nw;
 656
 657                 pmap_insert(sd->new_places, nw, list);
 658         }
 659 }
 660
 661 /**
 662  * Make a new Survive DCE environment.
 663  */
 664 survive_dce_t *new_survive_dce(void) {
 665         survive_dce_t *res = XMALLOC(survive_dce_t);
 666         obstack_init(&res->obst);
 667         res->places     = pmap_create();
 668         res->new_places = NULL;
 669
 670         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
 671         res->dead_node_elim.context                   = res;
 672         res->dead_node_elim.next                      = NULL;
 673
 674         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
 675         res->dead_node_elim_subst.context = res;
 676         res->dead_node_elim_subst.next    = NULL;
 677
 678         register_hook(hook_dead_node_elim, &res->dead_node_elim);
 679         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
 680         return res;
 681 }
 682
 683 /**
 684  * Free a Survive DCE environment.
 685  */
 686 void free_survive_dce(survive_dce_t *sd) {
 687         obstack_free(&sd->obst, NULL);
 688         pmap_destroy(sd->places);
 689         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
 690         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
 691         xfree(sd);
 692 }
 693
 694 /**
 695  * Register a node pointer to be patched upon DCE.
 696  * When DCE occurs, the node pointer specified by @p place will be
 697  * patched to the new address of the node it is pointing to.
 698  *
 699  * @param sd    The Survive DCE environment.
 700  * @param place The address of the node pointer.
 701  */
 702 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
 703         if (*place != NULL) {
 704                 ir_node *irn      = *place;
 705                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
 706                 survive_dce_list_t *nw   = obstack_alloc(&sd->obst, sizeof(nw[0]));
 707
 708                 nw->next  = curr;
 709                 nw->place = place;
 710
 711                 pmap_insert(sd->places, irn, nw);
 712         }
 713 }
 714
 715 /*--------------------------------------------------------------------*/
 716 /*  Functionality for inlining                                         */
 717 /*--------------------------------------------------------------------*/
 718
 719 /**
 720  * Copy node for inlineing.  Updates attributes that change when
 721  * inlineing but not for dead node elimination.
 722  *
 723  * Copies the node by calling copy_node() and then updates the entity if
 724  * it's a local one.  env must be a pointer of the frame type of the
 725  * inlined procedure. The new entities must be in the link field of
 726  * the entities.
 727  */
 728 static void copy_node_inline(ir_node *n, void *env) {
 729         ir_node *nn;
 730         ir_type *frame_tp = (ir_type *)env;
 731
 732         copy_node(n, NULL);
 733         if (is_Sel(n)) {
 734                 nn = get_new_node(n);
 735                 assert(is_Sel(nn));
 736                 /* use copied entities from the new frame */
 737                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
 738                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
 739                 }
 740         } else if (is_Block(n)) {
 741                 nn = get_new_node(n);
 742                 nn->attr.block.irg.irg = current_ir_graph;
 743         }
 744 }
 745
 746 /**
 747  * Copies new predecessors of old node and move constants to
 748  * the Start Block.
 749  */
 750 static void copy_preds_inline(ir_node *n, void *env) {
 751         ir_node *nn;
 752
 753         copy_preds(n, env);
 754         nn = skip_Id(get_new_node(n));
 755         if (is_irn_constlike(nn)) {
 756                 /* move Constants into the start block */
 757                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
 758
 759                 n = identify_remember(current_ir_graph->value_table, nn);
 760                 if (nn != n) {
 761                         DBG_OPT_CSE(nn, n);
 762                         exchange(nn, n);
 763                 }
 764         }
 765 }
 766
 767 /**
 768  * Walker: checks if P_value_arg_base is used.
 769  */
 770 static void find_addr(ir_node *node, void *env) {
 771         int *allow_inline = env;
 772         if (is_Sel(node)) {
 773                 ir_graph *irg = current_ir_graph;
 774                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
 775                         /* access to frame */
 776                         ir_entity *ent = get_Sel_entity(node);
 777                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
 778                                 /* access to value_type */
 779                                 *allow_inline = 0;
 780                         }
 781                 }
 782         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
 783                 /* From GCC:
 784                  * Refuse to inline alloca call unless user explicitly forced so as this
 785                  * may change program's memory overhead drastically when the function
 786                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
 787                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
 788                  *
 789                  * Sorrily this is true with our implementation also.
 790                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
 791                  * disables inlining of functions using VLA (with are completely save).
 792                  *
 793                  * 2 Solutions:
 794                  * - add a flag to the Alloc node for "real" alloca() calls
 795                  * - add a new Stack-Restore node at the end of a function using alloca()
 796                  */
 797                 *allow_inline = 0;
 798         }
 799 }
 800
 801 /**
 802  * Check if we can inline a given call.
 803  * Currently, we cannot inline two cases:
 804  * - call with compound arguments
 805  * - graphs that take the address of a parameter
 806  *
 807  * check these conditions here
 808  */
 809 static int can_inline(ir_node *call, ir_graph *called_graph) {
 810         ir_type *call_type = get_Call_type(call);
 811         int params, ress, i, res;
 812         assert(is_Method_type(call_type));
 813
 814         params = get_method_n_params(call_type);
 815         ress   = get_method_n_ress(call_type);
 816
 817         /* check parameters for compound arguments */
 818         for (i = 0; i < params; ++i) {
 819                 ir_type *p_type = get_method_param_type(call_type, i);
 820
 821                 if (is_compound_type(p_type))
 822                         return 0;
 823         }
 824
 825         /* check results for compound arguments */
 826         for (i = 0; i < ress; ++i) {
 827                 ir_type *r_type = get_method_res_type(call_type, i);
 828
 829                 if (is_compound_type(r_type))
 830                         return 0;
 831         }
 832
 833         res = 1;
 834         irg_walk_graph(called_graph, find_addr, NULL, &res);
 835
 836         return res;
 837 }
 838
 839 enum exc_mode {
 840         exc_handler    = 0, /**< There is a handler. */
 841         exc_to_end     = 1, /**< Branches to End. */
 842         exc_no_handler = 2  /**< Exception handling not represented. */
 843 };
 844
 845 /* Inlines a method at the given call site. */
 846 int inline_method(ir_node *call, ir_graph *called_graph) {
 847         ir_node             *pre_call;
 848         ir_node             *post_call, *post_bl;
 849         ir_node             *in[pn_Start_max];
 850         ir_node             *end, *end_bl, *block;
 851         ir_node             **res_pred;
 852         ir_node             **cf_pred;
 853         ir_node             **args_in;
 854         ir_node             *ret, *phi;
 855         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
 856         enum exc_mode       exc_handling;
 857         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
 858         ir_entity           *ent;
 859         ir_graph            *rem, *irg;
 860         irg_inline_property prop = get_irg_inline_property(called_graph);
 861         unsigned long       visited;
 862
 863         if (prop == irg_inline_forbidden)
 864                 return 0;
 865
 866         ent = get_irg_entity(called_graph);
 867
 868         mtp = get_entity_type(ent);
 869         ctp = get_Call_type(call);
 870         n_params = get_method_n_params(mtp);
 871         n_res    = get_method_n_ress(mtp);
 872         if (n_params > get_method_n_params(ctp)) {
 873                 /* this is a bad feature of C: without a prototype, we can
 874                  * call a function with less parameters than needed. Currently
 875                  * we don't support this, although we could use Unknown than. */
 876                 return 0;
 877         }
 878         if (n_res != get_method_n_ress(ctp)) {
 879                 return 0;
 880         }
 881
 882         /* Argh, compiling C has some bad consequences:
 883          * It is implementation dependent what happens in that case.
 884          * We support inlining, if the bitsize of the types matches AND
 885          * the same arithmetic is used. */
 886         for (i = n_params - 1; i >= 0; --i) {
 887                 ir_type *param_tp = get_method_param_type(mtp, i);
 888                 ir_type *arg_tp   = get_method_param_type(ctp, i);
 889
 890                 if (param_tp != arg_tp) {
 891                         ir_mode *pmode = get_type_mode(param_tp);
 892                         ir_mode *amode = get_type_mode(arg_tp);
 893
 894                         if (pmode == NULL || amode == NULL)
 895                                 return 0;
 896                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
 897                                 return 0;
 898                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
 899                                 return 0;
 900                         /* otherwise we can simply "reinterpret" the bits */
 901                 }
 902         }
 903         for (i = n_res - 1; i >= 0; --i) {
 904                 ir_type *decl_res_tp = get_method_res_type(mtp, i);
 905                 ir_type *used_res_tp = get_method_res_type(ctp, i);
 906
 907                 if (decl_res_tp != used_res_tp) {
 908                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
 909                         ir_mode *used_mode = get_type_mode(used_res_tp);
 910                         if (decl_mode == NULL || used_mode == NULL)
 911                                 return 0;
 912                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
 913                                 return 0;
 914                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
 915                                 return 0;
 916                         /* otherwise we can "reinterpret" the bits */
 917                 }
 918         }
 919
 920         irg = get_irn_irg(call);
 921
 922         /*
 923          * We cannot inline a recursive call. The graph must be copied before
 924          * the call the inline_method() using create_irg_copy().
 925          */
 926         if (called_graph == irg)
 927                 return 0;
 928
 929         /*
 930          * currently, we cannot inline two cases:
 931          * - call with compound arguments
 932          * - graphs that take the address of a parameter
 933          */
 934         if (! can_inline(call, called_graph))
 935                 return 0;
 936
 937         rem = current_ir_graph;
 938         current_ir_graph = irg;
 939
 940         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
 941
 942         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
 943         rem_opt = get_opt_optimize();
 944         set_optimize(0);
 945
 946         /* Handle graph state */
 947         assert(get_irg_phase_state(irg) != phase_building);
 948         assert(get_irg_pinned(irg) == op_pin_state_pinned);
 949         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
 950         set_irg_outs_inconsistent(irg);
 951         set_irg_extblk_inconsistent(irg);
 952         set_irg_doms_inconsistent(irg);
 953         set_irg_loopinfo_inconsistent(irg);
 954         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 955         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
 956
 957         /* -- Check preconditions -- */
 958         assert(is_Call(call));
 959
 960         /* here we know we WILL inline, so inform the statistics */
 961         hook_inline(call, called_graph);
 962
 963         /* -- Decide how to handle exception control flow: Is there a handler
 964            for the Call node, or do we branch directly to End on an exception?
 965            exc_handling:
 966            0 There is a handler.
 967            1 Branches to End.
 968            2 Exception handling not represented in Firm. -- */
 969         {
 970                 ir_node *proj, *Mproj = NULL, *Xproj = NULL;
 971                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
 972                         long proj_nr = get_Proj_proj(proj);
 973                         if (proj_nr == pn_Call_X_except) Xproj = proj;
 974                         if (proj_nr == pn_Call_M_except) Mproj = proj;
 975                 }
 976                 if      (Mproj) { assert(Xproj); exc_handling = exc_handler; } /*  Mproj           */
 977                 else if (Xproj) {                exc_handling = exc_to_end; } /* !Mproj &&  Xproj   */
 978                 else            {                exc_handling = exc_no_handler; } /* !Mproj && !Xproj   */
 979         }
 980
 981         /* create the argument tuple */
 982         NEW_ARR_A(ir_type *, args_in, n_params);
 983
 984         block = get_nodes_block(call);
 985         for (i = n_params - 1; i >= 0; --i) {
 986                 ir_node *arg      = get_Call_param(call, i);
 987                 ir_type *param_tp = get_method_param_type(mtp, i);
 988                 ir_mode *mode     = get_type_mode(param_tp);
 989
 990                 if (mode != get_irn_mode(arg)) {
 991                         arg = new_r_Conv(block, arg, mode);
 992                 }
 993                 args_in[i] = arg;
 994         }
 995
 996         /* --
 997            the procedure and later replaces the Start node of the called graph.
 998            Post_call is the old Call node and collects the results of the called
 999            graph. Both will end up being a tuple.  -- */
1000         post_bl = get_nodes_block(call);
1001         set_irg_current_block(irg, post_bl);
1002         /* XxMxPxPxPxT of Start + parameter of Call */
1003         in[pn_Start_X_initial_exec]   = new_Jmp();
1004         in[pn_Start_M]                = get_Call_mem(call);
1005         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
1006         in[pn_Start_P_tls]            = get_irg_tls(irg);
1007         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
1008         pre_call = new_Tuple(pn_Start_max, in);
1009         post_call = call;
1010
1011         /* --
1012            The new block gets the ins of the old block, pre_call and all its
1013            predecessors and all Phi nodes. -- */
1014         part_block(pre_call);
1015
1016         /* -- Prepare state for dead node elimination -- */
1017         /* Visited flags in calling irg must be >= flag in called irg.
1018            Else walker and arity computation will not work. */
1019         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
1020                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
1021         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
1022                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
1023         visited = get_irg_visited(irg);
1024
1025         /* Set pre_call as new Start node in link field of the start node of
1026            calling graph and pre_calls block as new block for the start block
1027            of calling graph.
1028            Further mark these nodes so that they are not visited by the
1029            copying. */
1030         set_irn_link(get_irg_start(called_graph), pre_call);
1031         set_irn_visited(get_irg_start(called_graph), visited);
1032         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1033         set_irn_visited(get_irg_start_block(called_graph), visited);
1034
1035         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1036         set_irn_visited(get_irg_bad(called_graph), visited);
1037
1038         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1039         set_irn_visited(get_irg_no_mem(called_graph), visited);
1040
1041         /* Initialize for compaction of in arrays */
1042         inc_irg_block_visited(irg);
1043
1044         /* -- Replicate local entities of the called_graph -- */
1045         /* copy the entities. */
1046         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
1047         called_frame = get_irg_frame_type(called_graph);
1048         curr_frame   = get_irg_frame_type(irg);
1049         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1050                 ir_entity *new_ent, *old_ent;
1051                 old_ent = get_class_member(called_frame, i);
1052                 new_ent = copy_entity_own(old_ent, curr_frame);
1053                 set_entity_link(old_ent, new_ent);
1054         }
1055
1056         /* visited is > than that of called graph.  With this trick visited will
1057            remain unchanged so that an outer walker, e.g., searching the call nodes
1058             to inline, calling this inline will not visit the inlined nodes. */
1059         set_irg_visited(irg, get_irg_visited(irg)-1);
1060
1061         /* -- Performing dead node elimination inlines the graph -- */
1062         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1063            entities. */
1064         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1065                  get_irg_frame_type(called_graph));
1066
1067         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
1068
1069         /* Repair called_graph */
1070         set_irg_visited(called_graph, get_irg_visited(irg));
1071         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1072         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1073
1074         /* -- Merge the end of the inlined procedure with the call site -- */
1075         /* We will turn the old Call node into a Tuple with the following
1076            predecessors:
1077            -1:  Block of Tuple.
1078            0: Phi of all Memories of Return statements.
1079            1: Jmp from new Block that merges the control flow from all exception
1080            predecessors of the old end block.
1081            2: Tuple of all arguments.
1082            3: Phi of Exception memories.
1083            In case the old Call directly branches to End on an exception we don't
1084            need the block merging all exceptions nor the Phi of the exception
1085            memories.
1086         */
1087
1088         /* -- Precompute some values -- */
1089         end_bl = get_new_node(get_irg_end_block(called_graph));
1090         end = get_new_node(get_irg_end(called_graph));
1091         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1092         n_res = get_method_n_ress(get_Call_type(call));
1093
1094         res_pred = XMALLOCN(ir_node*, n_res);
1095         cf_pred  = XMALLOCN(ir_node*, arity);
1096
1097         set_irg_current_block(irg, post_bl); /* just to make sure */
1098
1099         /* -- archive keepalives -- */
1100         irn_arity = get_irn_arity(end);
1101         for (i = 0; i < irn_arity; i++) {
1102                 ir_node *ka = get_End_keepalive(end, i);
1103                 if (! is_Bad(ka))
1104                         add_End_keepalive(get_irg_end(irg), ka);
1105         }
1106
1107         /* The new end node will die.  We need not free as the in array is on the obstack:
1108            copy_node() only generated 'D' arrays. */
1109
1110         /* -- Replace Return nodes by Jump nodes. -- */
1111         n_ret = 0;
1112         for (i = 0; i < arity; i++) {
1113                 ir_node *ret;
1114                 ret = get_Block_cfgpred(end_bl, i);
1115                 if (is_Return(ret)) {
1116                         cf_pred[n_ret] = new_r_Jmp(get_nodes_block(ret));
1117                         n_ret++;
1118                 }
1119         }
1120         set_irn_in(post_bl, n_ret, cf_pred);
1121
1122         /* -- Build a Tuple for all results of the method.
1123            Add Phi node if there was more than one Return.  -- */
1124         turn_into_tuple(post_call, pn_Call_max);
1125         /* First the Memory-Phi */
1126         n_ret = 0;
1127         for (i = 0; i < arity; i++) {
1128                 ret = get_Block_cfgpred(end_bl, i);
1129                 if (is_Return(ret)) {
1130                         cf_pred[n_ret] = get_Return_mem(ret);
1131                         n_ret++;
1132                 }
1133         }
1134         phi = new_Phi(n_ret, cf_pred, mode_M);
1135         set_Tuple_pred(call, pn_Call_M_regular, phi);
1136         /* Conserve Phi-list for further inlinings -- but might be optimized */
1137         if (get_nodes_block(phi) == post_bl) {
1138                 set_irn_link(phi, get_irn_link(post_bl));
1139                 set_irn_link(post_bl, phi);
1140         }
1141         /* Now the real results */
1142         if (n_res > 0) {
1143                 for (j = 0; j < n_res; j++) {
1144                         ir_type *res_type = get_method_res_type(ctp, j);
1145                         ir_mode *res_mode = get_type_mode(res_type);
1146                         n_ret = 0;
1147                         for (i = 0; i < arity; i++) {
1148                                 ret = get_Block_cfgpred(end_bl, i);
1149                                 if (is_Return(ret)) {
1150                                         ir_node *res = get_Return_res(ret, j);
1151                                         if (get_irn_mode(res) != res_mode) {
1152                                                 ir_node *block = get_nodes_block(res);
1153                                                 res = new_r_Conv(block, res, res_mode);
1154                                         }
1155                                         cf_pred[n_ret] = res;
1156                                         n_ret++;
1157                                 }
1158                         }
1159                         if (n_ret > 0)
1160                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1161                         else
1162                                 phi = new_Bad();
1163                         res_pred[j] = phi;
1164                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1165                         if (get_nodes_block(phi) == post_bl) {
1166                                 set_Phi_next(phi, get_Block_phis(post_bl));
1167                                 set_Block_phis(post_bl, phi);
1168                         }
1169                 }
1170                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1171         } else {
1172                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1173         }
1174         /* handle the regular call */
1175         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1176
1177         /* For now, we cannot inline calls with value_base */
1178         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1179
1180         /* Finally the exception control flow.
1181            We have two (three) possible situations:
1182            First if the Call branches to an exception handler: We need to add a Phi node to
1183            collect the memory containing the exception objects.  Further we need
1184            to add another block to get a correct representation of this Phi.  To
1185            this block we add a Jmp that resolves into the X output of the Call
1186            when the Call is turned into a tuple.
1187            Second the Call branches to End, the exception is not handled.  Just
1188            add all inlined exception branches to the End node.
1189            Third: there is no Exception edge at all. Handle as case two. */
1190         if (exc_handling == exc_handler) {
1191                 n_exc = 0;
1192                 for (i = 0; i < arity; i++) {
1193                         ir_node *ret, *irn;
1194                         ret = get_Block_cfgpred(end_bl, i);
1195                         irn = skip_Proj(ret);
1196                         if (is_fragile_op(irn) || is_Raise(irn)) {
1197                                 cf_pred[n_exc] = ret;
1198                                 ++n_exc;
1199                         }
1200                 }
1201                 if (n_exc > 0) {
1202                         ir_node *block = new_Block(n_exc, cf_pred);
1203                         set_cur_block(block);
1204
1205                         set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1206                         /* The Phi for the memories with the exception objects */
1207                         n_exc = 0;
1208                         for (i = 0; i < arity; i++) {
1209                                 ir_node *ret;
1210                                 ret = skip_Proj(get_Block_cfgpred(end_bl, i));
1211                                 if (is_Call(ret)) {
1212                                         cf_pred[n_exc] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 3);
1213                                         n_exc++;
1214                                 } else if (is_fragile_op(ret)) {
1215                                         /* We rely that all cfops have the memory output at the same position. */
1216                                         cf_pred[n_exc] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 0);
1217                                         n_exc++;
1218                                 } else if (is_Raise(ret)) {
1219                                         cf_pred[n_exc] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 1);
1220                                         n_exc++;
1221                                 }
1222                         }
1223                         set_Tuple_pred(call, pn_Call_M_except, new_Phi(n_exc, cf_pred, mode_M));
1224                 } else {
1225                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1226                         set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1227                 }
1228         } else {
1229                 ir_node *main_end_bl;
1230                 int main_end_bl_arity;
1231                 ir_node **end_preds;
1232
1233                 /* assert(exc_handling == 1 || no exceptions. ) */
1234                 n_exc = 0;
1235                 for (i = 0; i < arity; i++) {
1236                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1237                         ir_node *irn = skip_Proj(ret);
1238
1239                         if (is_fragile_op(irn) || is_Raise(irn)) {
1240                                 cf_pred[n_exc] = ret;
1241                                 n_exc++;
1242                         }
1243                 }
1244                 main_end_bl       = get_irg_end_block(irg);
1245                 main_end_bl_arity = get_irn_arity(main_end_bl);
1246                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
1247
1248                 for (i = 0; i < main_end_bl_arity; ++i)
1249                         end_preds[i] = get_irn_n(main_end_bl, i);
1250                 for (i = 0; i < n_exc; ++i)
1251                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1252                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1253                 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1254                 set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1255                 free(end_preds);
1256         }
1257         free(res_pred);
1258         free(cf_pred);
1259
1260         /* --  Turn CSE back on. -- */
1261         set_optimize(rem_opt);
1262         current_ir_graph = rem;
1263
1264         return 1;
1265 }
1266
1267 /********************************************************************/
1268 /* Apply inlining to small methods.                                 */
1269 /********************************************************************/
1270
1271 static struct obstack  temp_obst;
1272
1273 /** Represents a possible inlinable call in a graph. */
1274 typedef struct _call_entry {
1275         ir_node    *call;       /**< The Call node. */
1276         ir_graph   *callee;     /**< The callee IR-graph. */
1277         list_head  list;        /**< List head for linking the next one. */
1278         int        loop_depth;  /**< The loop depth of this call. */
1279         int        benefice;    /**< The calculated benefice of this call. */
1280         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
1281         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
1282 } call_entry;
1283
1284 /**
1285  * environment for inlining small irgs
1286  */
1287 typedef struct _inline_env_t {
1288         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
1289         list_head      calls; /**< The call entry list. */
1290 } inline_env_t;
1291
1292 /**
1293  * Returns the irg called from a Call node. If the irg is not
1294  * known, NULL is returned.
1295  *
1296  * @param call  the call node
1297  */
1298 static ir_graph *get_call_called_irg(ir_node *call) {
1299         ir_node *addr;
1300
1301         addr = get_Call_ptr(call);
1302         if (is_Global(addr)) {
1303                 ir_entity *ent = get_Global_entity(addr);
1304                 return get_entity_irg(ent);
1305         }
1306
1307         return NULL;
1308 }
1309
1310 /**
1311  * Walker: Collect all calls to known graphs inside a graph.
1312  */
1313 static void collect_calls(ir_node *call, void *env) {
1314         (void) env;
1315         if (is_Call(call)) {
1316                 ir_graph *called_irg = get_call_called_irg(call);
1317
1318                 if (called_irg != NULL) {
1319                         /* The Call node calls a locally defined method.  Remember to inline. */
1320                         inline_env_t *ienv  = env;
1321                         call_entry   *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
1322                         entry->call       = call;
1323                         entry->callee     = called_irg;
1324                         entry->loop_depth = 0;
1325                         entry->benefice   = 0;
1326                         entry->local_adr  = 0;
1327                         entry->all_const  = 0;
1328
1329                         list_add_tail(&entry->list, &ienv->calls);
1330                 }
1331         }
1332 }
1333
1334 /**
1335  * Inlines all small methods at call sites where the called address comes
1336  * from a Const node that references the entity representing the called
1337  * method.
1338  * The size argument is a rough measure for the code size of the method:
1339  * Methods where the obstack containing the firm graph is smaller than
1340  * size are inlined.
1341  */
1342 void inline_small_irgs(ir_graph *irg, int size) {
1343         ir_graph *rem = current_ir_graph;
1344         inline_env_t env;
1345         call_entry *entry;
1346
1347         current_ir_graph = irg;
1348         /* Handle graph state */
1349         assert(get_irg_phase_state(irg) != phase_building);
1350         free_callee_info(irg);
1351
1352         /* Find Call nodes to inline.
1353            (We can not inline during a walk of the graph, as inlining the same
1354            method several times changes the visited flag of the walked graph:
1355            after the first inlining visited of the callee equals visited of
1356            the caller.  With the next inlining both are increased.) */
1357         obstack_init(&env.obst);
1358         INIT_LIST_HEAD(&env.calls);
1359         irg_walk_graph(irg, NULL, collect_calls, &env);
1360
1361         if (! list_empty(&env.calls)) {
1362                 /* There are calls to inline */
1363                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1364                 collect_phiprojs(irg);
1365
1366                 list_for_each_entry(call_entry, entry, &env.calls, list) {
1367                         ir_graph            *callee = entry->callee;
1368                         irg_inline_property prop    = get_irg_inline_property(callee);
1369
1370                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1371                                 /* do not inline forbidden / weak graphs */
1372                                 continue;
1373                         }
1374
1375                         if (prop >= irg_inline_forced ||
1376                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1377                                 inline_method(entry->call, callee);
1378                         }
1379                 }
1380                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1381         }
1382         obstack_free(&env.obst, NULL);
1383         current_ir_graph = rem;
1384 }
1385
1386 struct inline_small_irgs_pass_t {
1387         ir_graph_pass_t pass;
1388         int            size;
1389 };
1390
1391 /**
1392  * Wrapper to run inline_small_irgs() as a pass.
1393  */
1394 static int inline_small_irgs_wrapper(ir_graph *irg, void *context) {
1395         struct inline_small_irgs_pass_t *pass = context;
1396
1397         inline_small_irgs(irg, pass->size);
1398         return 0;
1399 }
1400
1401 /* create a pass for inline_small_irgs() */
1402 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size) {
1403         struct inline_small_irgs_pass_t *pass =
1404                 XMALLOCZ(struct inline_small_irgs_pass_t);
1405
1406         pass->size = size;
1407         return def_graph_pass_constructor(
1408                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
1409 }
1410
1411 /**
1412  * Environment for inlining irgs.
1413  */
1414 typedef struct {
1415         list_head calls;             /**< List of of all call nodes in this graph. */
1416         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
1417         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1418         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
1419         unsigned  n_nodes_orig;      /**< for statistics */
1420         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
1421         unsigned  n_call_nodes_orig; /**< for statistics */
1422         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
1423         unsigned  n_callers_orig;    /**< for statistics */
1424         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
1425         unsigned  local_vars:1;      /**< Set, if an inlined function got the address of a local variable. */
1426         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
1427 } inline_irg_env;
1428
1429 /**
1430  * Allocate a new environment for inlining.
1431  */
1432 static inline_irg_env *alloc_inline_irg_env(void) {
1433         inline_irg_env *env    = obstack_alloc(&temp_obst, sizeof(*env));
1434         INIT_LIST_HEAD(&env->calls);
1435         env->local_weights     = NULL;
1436         env->n_nodes           = -2; /* do not count count Start, End */
1437         env->n_blocks          = -2; /* do not count count Start, End Block */
1438         env->n_nodes_orig      = -2; /* do not count Start, End */
1439         env->n_call_nodes      = 0;
1440         env->n_call_nodes_orig = 0;
1441         env->n_callers         = 0;
1442         env->n_callers_orig    = 0;
1443         env->got_inline        = 0;
1444         env->local_vars        = 0;
1445         env->recursive         = 0;
1446         return env;
1447 }
1448
1449 typedef struct walker_env {
1450         inline_irg_env *x;     /**< the inline environment */
1451         char ignore_runtime;   /**< the ignore runtime flag */
1452         char ignore_callers;   /**< if set, do change callers data */
1453 } wenv_t;
1454
1455 /**
1456  * post-walker: collect all calls in the inline-environment
1457  * of a graph and sum some statistics.
1458  */
1459 static void collect_calls2(ir_node *call, void *ctx) {
1460         wenv_t         *env = ctx;
1461         inline_irg_env *x = env->x;
1462         ir_opcode      code = get_irn_opcode(call);
1463         ir_graph       *callee;
1464         call_entry     *entry;
1465
1466         /* count meaningful nodes in irg */
1467         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1468                 if (code != iro_Block) {
1469                         ++x->n_nodes;
1470                         ++x->n_nodes_orig;
1471                 } else {
1472                         ++x->n_blocks;
1473                 }
1474         }
1475
1476         if (code != iro_Call) return;
1477
1478         /* check, if it's a runtime call */
1479         if (env->ignore_runtime) {
1480                 ir_node *symc = get_Call_ptr(call);
1481
1482                 if (is_Global(symc)) {
1483                         ir_entity *ent = get_Global_entity(symc);
1484
1485                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1486                                 return;
1487                 }
1488         }
1489
1490         /* collect all call nodes */
1491         ++x->n_call_nodes;
1492         ++x->n_call_nodes_orig;
1493
1494         callee = get_call_called_irg(call);
1495         if (callee != NULL) {
1496                 if (! env->ignore_callers) {
1497                         inline_irg_env *callee_env = get_irg_link(callee);
1498                         /* count all static callers */
1499                         ++callee_env->n_callers;
1500                         ++callee_env->n_callers_orig;
1501                 }
1502                 if (callee == current_ir_graph)
1503                         x->recursive = 1;
1504
1505                 /* link it in the list of possible inlinable entries */
1506                 entry = obstack_alloc(&temp_obst, sizeof(*entry));
1507                 entry->call       = call;
1508                 entry->callee     = callee;
1509                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1510                 entry->benefice   = 0;
1511                 entry->local_adr  = 0;
1512                 entry->all_const  = 0;
1513
1514                 list_add_tail(&entry->list, &x->calls);
1515         }
1516 }
1517
1518 /**
1519  * Returns TRUE if the number of callers is 0 in the irg's environment,
1520  * hence this irg is a leave.
1521  */
1522 inline static int is_leave(ir_graph *irg) {
1523         inline_irg_env *env = get_irg_link(irg);
1524         return env->n_call_nodes == 0;
1525 }
1526
1527 /**
1528  * Returns TRUE if the number of nodes in the callee is
1529  * smaller then size in the irg's environment.
1530  */
1531 inline static int is_smaller(ir_graph *callee, unsigned size) {
1532         inline_irg_env *env = get_irg_link(callee);
1533         return env->n_nodes < size;
1534 }
1535
1536 /**
1537  * Duplicate a call entry.
1538  *
1539  * @param entry     the original entry to duplicate
1540  * @param new_call  the new call node
1541  * @param loop_depth_delta
1542  *                  delta value for the loop depth
1543  */
1544 static call_entry *duplicate_call_entry(const call_entry *entry,
1545                                         ir_node *new_call, int loop_depth_delta) {
1546         call_entry *nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1547         nentry->call       = new_call;
1548         nentry->callee     = entry->callee;
1549         nentry->benefice   = entry->benefice;
1550         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1551         nentry->local_adr  = entry->local_adr;
1552         nentry->all_const  = entry->all_const;
1553
1554         return nentry;
1555 }
1556
1557 /**
1558  * Append all call nodes of the source environment to the nodes of in the destination
1559  * environment.
1560  *
1561  * @param dst         destination environment
1562  * @param src         source environment
1563  * @param loop_depth  the loop depth of the call that is replaced by the src list
1564  */
1565 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth) {
1566         call_entry *entry, *nentry;
1567
1568         /* Note that the src list points to Call nodes in the inlined graph, but
1569            we need Call nodes in our graph. Luckily the inliner leaves this information
1570            in the link field. */
1571         list_for_each_entry(call_entry, entry, &src->calls, list) {
1572                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
1573                 list_add_tail(&nentry->list, &dst->calls);
1574         }
1575         dst->n_call_nodes += src->n_call_nodes;
1576         dst->n_nodes      += src->n_nodes;
1577 }
1578
1579 /*
1580  * Inlines small leave methods at call sites where the called address comes
1581  * from a Const node that references the entity representing the called
1582  * method.
1583  * The size argument is a rough measure for the code size of the method:
1584  * Methods where the obstack containing the firm graph is smaller than
1585  * size are inlined.
1586  */
1587 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1588                             unsigned size, int ignore_runtime)
1589 {
1590         inline_irg_env   *env;
1591         ir_graph         *irg;
1592         int              i, n_irgs;
1593         ir_graph         *rem;
1594         int              did_inline;
1595         wenv_t           wenv;
1596         call_entry       *entry, *next;
1597         const call_entry *centry;
1598         pmap             *copied_graphs;
1599         pmap_entry       *pm_entry;
1600
1601         rem = current_ir_graph;
1602         obstack_init(&temp_obst);
1603
1604         /* a map for the copied graphs, used to inline recursive calls */
1605         copied_graphs = pmap_create();
1606
1607         /* extend all irgs by a temporary data structure for inlining. */
1608         n_irgs = get_irp_n_irgs();
1609         for (i = 0; i < n_irgs; ++i)
1610                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1611
1612         /* Pre-compute information in temporary data structure. */
1613         wenv.ignore_runtime = ignore_runtime;
1614         wenv.ignore_callers = 0;
1615         for (i = 0; i < n_irgs; ++i) {
1616                 ir_graph *irg = get_irp_irg(i);
1617
1618                 assert(get_irg_phase_state(irg) != phase_building);
1619                 free_callee_info(irg);
1620
1621                 assure_cf_loop(irg);
1622                 wenv.x = get_irg_link(irg);
1623                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1624         }
1625
1626         /* -- and now inline. -- */
1627
1628         /* Inline leaves recursively -- we might construct new leaves. */
1629         do {
1630                 did_inline = 0;
1631
1632                 for (i = 0; i < n_irgs; ++i) {
1633                         ir_node *call;
1634                         int phiproj_computed = 0;
1635
1636                         current_ir_graph = get_irp_irg(i);
1637                         env              = get_irg_link(current_ir_graph);
1638
1639                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1640                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1641                                 ir_graph            *callee;
1642                                 irg_inline_property  prop;
1643
1644                                 if (env->n_nodes > maxsize)
1645                                         break;
1646
1647                                 call   = entry->call;
1648                                 callee = entry->callee;
1649
1650                                 prop = get_irg_inline_property(callee);
1651                                 if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1652                                         /* do not inline forbidden / weak graphs */
1653                                         continue;
1654                                 }
1655
1656                                 if (is_leave(callee) && (
1657                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1658                                         if (!phiproj_computed) {
1659                                                 phiproj_computed = 1;
1660                                                 collect_phiprojs(current_ir_graph);
1661                                         }
1662                                         did_inline = inline_method(call, callee);
1663
1664                                         if (did_inline) {
1665                                                 inline_irg_env *callee_env = get_irg_link(callee);
1666
1667                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1668                                                 phiproj_computed = 0;
1669
1670                                                 /* Do some statistics */
1671                                                 env->got_inline = 1;
1672                                                 --env->n_call_nodes;
1673                                                 env->n_nodes += callee_env->n_nodes;
1674                                                 --callee_env->n_callers;
1675
1676                                                 /* remove this call from the list */
1677                                                 list_del(&entry->list);
1678                                                 continue;
1679                                         }
1680                                 }
1681                         }
1682                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1683                 }
1684         } while (did_inline);
1685
1686         /* inline other small functions. */
1687         for (i = 0; i < n_irgs; ++i) {
1688                 ir_node *call;
1689                 int phiproj_computed = 0;
1690
1691                 current_ir_graph = get_irp_irg(i);
1692                 env              = get_irg_link(current_ir_graph);
1693
1694                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1695
1696                 /* note that the list of possible calls is updated during the process */
1697                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1698                         irg_inline_property prop;
1699                         ir_graph            *callee;
1700                         pmap_entry          *e;
1701
1702                         call   = entry->call;
1703                         callee = entry->callee;
1704
1705                         prop = get_irg_inline_property(callee);
1706                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1707                                 /* do not inline forbidden / weak graphs */
1708                                 continue;
1709                         }
1710
1711                         e = pmap_find(copied_graphs, callee);
1712                         if (e != NULL) {
1713                                 /*
1714                                  * Remap callee if we have a copy.
1715                                  * FIXME: Should we do this only for recursive Calls ?
1716                                  */
1717                                 callee = e->value;
1718                         }
1719
1720                         if (prop >= irg_inline_forced ||
1721                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1722                                 if (current_ir_graph == callee) {
1723                                         /*
1724                                          * Recursive call: we cannot directly inline because we cannot walk
1725                                          * the graph and change it. So we have to make a copy of the graph
1726                                          * first.
1727                                          */
1728
1729                                         inline_irg_env *callee_env;
1730                                         ir_graph       *copy;
1731
1732                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1733
1734                                         /*
1735                                          * No copy yet, create one.
1736                                          * Note that recursive methods are never leaves, so it is sufficient
1737                                          * to test this condition here.
1738                                          */
1739                                         copy = create_irg_copy(callee);
1740
1741                                         /* create_irg_copy() destroys the Proj links, recompute them */
1742                                         phiproj_computed = 0;
1743
1744                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1745
1746                                         /* allocate new environment */
1747                                         callee_env = alloc_inline_irg_env();
1748                                         set_irg_link(copy, callee_env);
1749
1750                                         assure_cf_loop(copy);
1751                                         wenv.x              = callee_env;
1752                                         wenv.ignore_callers = 1;
1753                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1754
1755                                         /*
1756                                          * Enter the entity of the original graph. This is needed
1757                                          * for inline_method(). However, note that ent->irg still points
1758                                          * to callee, NOT to copy.
1759                                          */
1760                                         set_irg_entity(copy, get_irg_entity(callee));
1761
1762                                         pmap_insert(copied_graphs, callee, copy);
1763                                         callee = copy;
1764
1765                                         /* we have only one caller: the original graph */
1766                                         callee_env->n_callers      = 1;
1767                                         callee_env->n_callers_orig = 1;
1768                                 }
1769                                 if (! phiproj_computed) {
1770                                         phiproj_computed = 1;
1771                                         collect_phiprojs(current_ir_graph);
1772                                 }
1773                                 did_inline = inline_method(call, callee);
1774                                 if (did_inline) {
1775                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1776
1777                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1778                                         phiproj_computed = 0;
1779
1780                                         /* callee was inline. Append it's call list. */
1781                                         env->got_inline = 1;
1782                                         --env->n_call_nodes;
1783                                         append_call_list(env, callee_env, entry->loop_depth);
1784                                         --callee_env->n_callers;
1785
1786                                         /* after we have inlined callee, all called methods inside callee
1787                                            are now called once more */
1788                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1789                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1790                                                 ++penv->n_callers;
1791                                         }
1792
1793                                         /* remove this call from the list */
1794                                         list_del(&entry->list);
1795                                         continue;
1796                                 }
1797                         }
1798                 }
1799                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1800         }
1801
1802         for (i = 0; i < n_irgs; ++i) {
1803                 irg = get_irp_irg(i);
1804                 env = get_irg_link(irg);
1805
1806                 if (env->got_inline) {
1807                         optimize_graph_df(irg);
1808                         optimize_cf(irg);
1809                 }
1810                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1811                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1812                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1813                         env->n_callers_orig, env->n_callers,
1814                         get_entity_name(get_irg_entity(irg))));
1815                 }
1816         }
1817
1818         /* kill the copied graphs: we don't need them anymore */
1819         foreach_pmap(copied_graphs, pm_entry) {
1820                 ir_graph *copy = pm_entry->value;
1821
1822                 /* reset the entity, otherwise it will be deleted in the next step ... */
1823                 set_irg_entity(copy, NULL);
1824                 free_ir_graph(copy);
1825         }
1826         pmap_destroy(copied_graphs);
1827
1828         obstack_free(&temp_obst, NULL);
1829         current_ir_graph = rem;
1830 }
1831
1832 struct inline_leave_functions_pass_t {
1833         ir_prog_pass_t pass;
1834         unsigned       maxsize;
1835         unsigned       leavesize;
1836         unsigned       size;
1837         int            ignore_runtime;
1838 };
1839
1840 /**
1841  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1842  */
1843 static int inline_leave_functions_wrapper(ir_prog *irp, void *context) {
1844         struct inline_leave_functions_pass_t *pass = context;
1845
1846         (void)irp;
1847         inline_leave_functions(
1848                 pass->maxsize, pass->leavesize,
1849                 pass->size, pass->ignore_runtime);
1850         return 0;
1851 }
1852
1853 /* create a pass for inline_leave_functions() */
1854 ir_prog_pass_t *inline_leave_functions_pass(
1855         const char *name, unsigned maxsize, unsigned leavesize,
1856         unsigned size, int ignore_runtime) {
1857         struct inline_leave_functions_pass_t *pass =
1858                 XMALLOCZ(struct inline_leave_functions_pass_t);
1859
1860         pass->maxsize        = maxsize;
1861         pass->leavesize      = leavesize;
1862         pass->size           = size;
1863         pass->ignore_runtime = ignore_runtime;
1864
1865         return def_prog_pass_constructor(
1866                 &pass->pass,
1867                 name ? name : "inline_leave_functions",
1868                 inline_leave_functions_wrapper);
1869 }
1870
1871 /**
1872  * Calculate the parameter weights for transmitting the address of a local variable.
1873  */
1874 static unsigned calc_method_local_weight(ir_node *arg) {
1875         int      i, j, k;
1876         unsigned v, weight = 0;
1877
1878         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1879                 ir_node *succ = get_irn_out(arg, i);
1880
1881                 switch (get_irn_opcode(succ)) {
1882                 case iro_Load:
1883                 case iro_Store:
1884                         /* Loads and Store can be removed */
1885                         weight += 3;
1886                         break;
1887                 case iro_Sel:
1888                         /* check if all args are constant */
1889                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1890                                 ir_node *idx = get_Sel_index(succ, j);
1891                                 if (! is_Const(idx))
1892                                         return 0;
1893                         }
1894                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1895                            some unsupported node. */
1896                         v = calc_method_local_weight(succ);
1897                         if (v == 0)
1898                                 return 0;
1899                         /* we can kill one Sel with constant indexes, this is cheap */
1900                         weight += v + 1;
1901                         break;
1902                 case iro_Id:
1903                         /* when looking backward we might find Id nodes */
1904                         weight += calc_method_local_weight(succ);
1905                         break;
1906                 case iro_Tuple:
1907                         /* unoptimized tuple */
1908                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1909                                 ir_node *pred = get_Tuple_pred(succ, j);
1910                                 if (pred == arg) {
1911                                         /* look for Proj(j) */
1912                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1913                                                 ir_node *succ_succ = get_irn_out(succ, k);
1914                                                 if (is_Proj(succ_succ)) {
1915                                                         if (get_Proj_proj(succ_succ) == j) {
1916                                                                 /* found */
1917                                                                 weight += calc_method_local_weight(succ_succ);
1918                                                         }
1919                                                 } else {
1920                                                         /* this should NOT happen */
1921                                                         return 0;
1922                                                 }
1923                                         }
1924                                 }
1925                         }
1926                         break;
1927                 default:
1928                         /* any other node: unsupported yet or bad. */
1929                         return 0;
1930                 }
1931         }
1932         return weight;
1933 }
1934
1935 /**
1936  * Calculate the parameter weights for transmitting the address of a local variable.
1937  */
1938 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
1939         ir_entity *ent = get_irg_entity(irg);
1940         ir_type  *mtp;
1941         int      nparams, i, proj_nr;
1942         ir_node  *irg_args, *arg;
1943
1944         mtp      = get_entity_type(ent);
1945         nparams  = get_method_n_params(mtp);
1946
1947         /* allocate a new array. currently used as 'analysed' flag */
1948         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1949
1950         /* If the method haven't parameters we have nothing to do. */
1951         if (nparams <= 0)
1952                 return;
1953
1954         assure_irg_outs(irg);
1955         irg_args = get_irg_args(irg);
1956         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1957                 arg     = get_irn_out(irg_args, i);
1958                 proj_nr = get_Proj_proj(arg);
1959                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1960         }
1961 }
1962
1963 /**
1964  * Calculate the benefice for transmitting an local variable address.
1965  * After inlining, the local variable might be transformed into a
1966  * SSA variable by scalar_replacement().
1967  */
1968 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
1969         inline_irg_env *env = get_irg_link(callee);
1970
1971         if (env->local_weights != NULL) {
1972                 if (pos < ARR_LEN(env->local_weights))
1973                         return env->local_weights[pos];
1974                 return 0;
1975         }
1976
1977         analyze_irg_local_weights(env, callee);
1978
1979         if (pos < ARR_LEN(env->local_weights))
1980                 return env->local_weights[pos];
1981         return 0;
1982 }
1983
1984 /**
1985  * Calculate a benefice value for inlining the given call.
1986  *
1987  * @param call       the call node we have to inspect
1988  * @param callee     the called graph
1989  */
1990 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1991 {
1992         ir_node   *call = entry->call;
1993         ir_entity *ent  = get_irg_entity(callee);
1994         ir_node   *frame_ptr;
1995         ir_type   *mtp;
1996         int       weight = 0;
1997         int       i, n_params, all_const;
1998         unsigned  cc, v;
1999         irg_inline_property prop;
2000
2001         inline_irg_env *callee_env;
2002
2003         prop = get_irg_inline_property(callee);
2004         if (prop == irg_inline_forbidden) {
2005                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
2006                     call, callee));
2007                 return entry->benefice = INT_MIN;
2008         }
2009
2010         if (get_irg_additional_properties(callee) & (mtp_property_noreturn | mtp_property_weak)) {
2011                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
2012                     call, callee));
2013                 return entry->benefice = INT_MIN;
2014         }
2015
2016         /* costs for every passed parameter */
2017         n_params = get_Call_n_params(call);
2018         mtp      = get_entity_type(ent);
2019         cc       = get_method_calling_convention(mtp);
2020         if (cc & cc_reg_param) {
2021                 /* register parameter, smaller costs for register parameters */
2022                 int max_regs = cc & ~cc_bits;
2023
2024                 if (max_regs < n_params)
2025                         weight += max_regs * 2 + (n_params - max_regs) * 5;
2026                 else
2027                         weight += n_params * 2;
2028         } else {
2029                 /* parameters are passed an stack */
2030                 weight += 5 * n_params;
2031         }
2032
2033         /* constant parameters improve the benefice */
2034         frame_ptr = get_irg_frame(current_ir_graph);
2035         all_const = 1;
2036         for (i = 0; i < n_params; ++i) {
2037                 ir_node *param = get_Call_param(call, i);
2038
2039                 if (is_Const(param)) {
2040                         weight += get_method_param_weight(ent, i);
2041                 } else {
2042                         all_const = 0;
2043                         if (is_SymConst(param))
2044                                 weight += get_method_param_weight(ent, i);
2045                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
2046                                 /*
2047                                  * An address of a local variable is transmitted. After
2048                                  * inlining, scalar_replacement might be able to remove the
2049                                  * local variable, so honor this.
2050                                  */
2051                                 v = get_method_local_adress_weight(callee, i);
2052                                 weight += v;
2053                                 if (v > 0)
2054                                         entry->local_adr = 1;
2055                         }
2056                 }
2057         }
2058         entry->all_const = all_const;
2059
2060         callee_env = get_irg_link(callee);
2061         if (callee_env->n_callers == 1 &&
2062             callee != current_ir_graph &&
2063                 get_entity_visibility(ent) == visibility_local) {
2064                 weight += 700;
2065         }
2066
2067         /* give a bonus for functions with one block */
2068         if (callee_env->n_blocks == 1)
2069                 weight = weight * 3 / 2;
2070
2071         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
2072         if (callee_env->n_nodes < 30 && !callee_env->recursive)
2073                 weight += 2000;
2074
2075         /* and finally for leaves: they do not increase the register pressure
2076            because of callee safe registers */
2077         if (callee_env->n_call_nodes == 0)
2078                 weight += 400;
2079
2080         /** it's important to inline inner loops first */
2081         if (entry->loop_depth > 30)
2082                 weight += 30 * 1024;
2083         else
2084                 weight += entry->loop_depth * 1024;
2085
2086         /*
2087          * All arguments constant is probably a good sign, give an extra bonus
2088          */
2089         if (all_const)
2090                 weight += 1024;
2091
2092         return entry->benefice = weight;
2093 }
2094
2095 static ir_graph **irgs;
2096 static int      last_irg;
2097
2098 /**
2099  * Callgraph walker, collect all visited graphs.
2100  */
2101 static void callgraph_walker(ir_graph *irg, void *data) {
2102         (void) data;
2103         irgs[last_irg++] = irg;
2104 }
2105
2106 /**
2107  * Creates an inline order for all graphs.
2108  *
2109  * @return the list of graphs.
2110  */
2111 static ir_graph **create_irg_list(void) {
2112         ir_entity **free_methods;
2113         int       arr_len;
2114         int       n_irgs = get_irp_n_irgs();
2115
2116         cgana(&arr_len, &free_methods);
2117         xfree(free_methods);
2118
2119         compute_callgraph();
2120
2121         last_irg = 0;
2122         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
2123
2124         callgraph_walk(NULL, callgraph_walker, NULL);
2125         assert(n_irgs == last_irg);
2126
2127         return irgs;
2128 }
2129
2130 /**
2131  * Push a call onto the priority list if its benefice is big enough.
2132  *
2133  * @param pqueue   the priority queue of calls
2134  * @param call     the call entry
2135  * @param inlien_threshold
2136  *                 the threshold value
2137  */
2138 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2139                             int inline_threshold)
2140 {
2141         ir_graph            *callee  = call->callee;
2142         irg_inline_property prop     = get_irg_inline_property(callee);
2143         int                 benefice = calc_inline_benefice(call, callee);
2144
2145         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2146             get_irn_irg(call->call), call->call, callee, benefice));
2147
2148         if (prop < irg_inline_forced && benefice < inline_threshold) {
2149                 return;
2150         }
2151
2152         pqueue_put(pqueue, call, benefice);
2153 }
2154
2155 /**
2156  * Try to inline calls into a graph.
2157  *
2158  * @param irg      the graph into which we inline
2159  * @param maxsize  do NOT inline if the size of irg gets
2160  *                 bigger than this amount
2161  * @param inline_threshold
2162  *                 threshold value for inline decision
2163  * @param copied_graphs
2164  *                 map containing copied of recursive graphs
2165  */
2166 static void inline_into(ir_graph *irg, unsigned maxsize,
2167                         int inline_threshold, pmap *copied_graphs)
2168 {
2169         int            phiproj_computed = 0;
2170         inline_irg_env *env = get_irg_link(irg);
2171         call_entry     *curr_call;
2172         wenv_t         wenv;
2173         pqueue_t       *pqueue;
2174
2175         if (env->n_call_nodes == 0)
2176                 return;
2177
2178         if (env->n_nodes > maxsize) {
2179                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2180                 return;
2181         }
2182
2183         current_ir_graph = irg;
2184         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2185
2186         /* put irgs into the pqueue */
2187         pqueue = new_pqueue();
2188
2189         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
2190                 assert(is_Call(curr_call->call));
2191                 maybe_push_call(pqueue, curr_call, inline_threshold);
2192         }
2193
2194         /* note that the list of possible calls is updated during the process */
2195         while (!pqueue_empty(pqueue)) {
2196                 int                 did_inline;
2197                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
2198                 ir_graph            *callee     = curr_call->callee;
2199                 ir_node             *call_node  = curr_call->call;
2200                 inline_irg_env      *callee_env = get_irg_link(callee);
2201                 irg_inline_property prop        = get_irg_inline_property(callee);
2202                 int                 loop_depth;
2203                 const call_entry    *centry;
2204                 pmap_entry          *e;
2205
2206                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
2207                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2208                                                 env->n_nodes, callee, callee_env->n_nodes));
2209                         continue;
2210                 }
2211
2212                 e = pmap_find(copied_graphs, callee);
2213                 if (e != NULL) {
2214                         int benefice = curr_call->benefice;
2215                         /*
2216                          * Reduce the weight for recursive function IFF not all arguments are const.
2217                          * inlining recursive functions is rarely good.
2218                          */
2219                         if (!curr_call->all_const)
2220                                 benefice -= 2000;
2221                         if (benefice < inline_threshold)
2222                                 continue;
2223
2224                         /*
2225                          * Remap callee if we have a copy.
2226                          */
2227                         callee     = e->value;
2228                         callee_env = get_irg_link(callee);
2229                 }
2230
2231                 if (current_ir_graph == callee) {
2232                         /*
2233                          * Recursive call: we cannot directly inline because we cannot
2234                          * walk the graph and change it. So we have to make a copy of
2235                          * the graph first.
2236                          */
2237                         int benefice = curr_call->benefice;
2238                         ir_graph *copy;
2239
2240                         /*
2241                          * Reduce the weight for recursive function IFF not all arguments are const.
2242                          * inlining recursive functions is rarely good.
2243                          */
2244                         if (!curr_call->all_const)
2245                                 benefice -= 2000;
2246                         if (benefice < inline_threshold)
2247                                 continue;
2248
2249                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2250
2251                         /*
2252                          * No copy yet, create one.
2253                          * Note that recursive methods are never leaves, so it is
2254                          * sufficient to test this condition here.
2255                          */
2256                         copy = create_irg_copy(callee);
2257
2258                         /* create_irg_copy() destroys the Proj links, recompute them */
2259                         phiproj_computed = 0;
2260
2261                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2262
2263                         /* allocate a new environment */
2264                         callee_env = alloc_inline_irg_env();
2265                         set_irg_link(copy, callee_env);
2266
2267                         assure_cf_loop(copy);
2268                         wenv.x              = callee_env;
2269                         wenv.ignore_callers = 1;
2270                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2271
2272                         /*
2273                          * Enter the entity of the original graph. This is needed
2274                          * for inline_method(). However, note that ent->irg still points
2275                          * to callee, NOT to copy.
2276                          */
2277                         set_irg_entity(copy, get_irg_entity(callee));
2278
2279                         pmap_insert(copied_graphs, callee, copy);
2280                         callee = copy;
2281
2282                         /* we have only one caller: the original graph */
2283                         callee_env->n_callers      = 1;
2284                         callee_env->n_callers_orig = 1;
2285                 }
2286                 if (! phiproj_computed) {
2287                         phiproj_computed = 1;
2288                         collect_phiprojs(current_ir_graph);
2289                 }
2290                 did_inline = inline_method(call_node, callee);
2291                 if (!did_inline)
2292                         continue;
2293
2294                 /* call was inlined, Phi/Projs for current graph must be recomputed */
2295                 phiproj_computed = 0;
2296
2297                 /* remove it from the caller list */
2298                 list_del(&curr_call->list);
2299
2300                 /* callee was inline. Append it's call list. */
2301                 env->got_inline = 1;
2302                 if (curr_call->local_adr)
2303                         env->local_vars = 1;
2304                 --env->n_call_nodes;
2305
2306                 /* we just generate a bunch of new calls */
2307                 loop_depth = curr_call->loop_depth;
2308                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
2309                         inline_irg_env *penv = get_irg_link(centry->callee);
2310                         ir_node        *new_call;
2311                         call_entry     *new_entry;
2312
2313                         /* after we have inlined callee, all called methods inside
2314                          * callee are now called once more */
2315                         ++penv->n_callers;
2316
2317                         /* Note that the src list points to Call nodes in the inlined graph,
2318                          * but we need Call nodes in our graph. Luckily the inliner leaves
2319                          * this information in the link field. */
2320                         new_call = get_irn_link(centry->call);
2321                         assert(is_Call(new_call));
2322
2323                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
2324                         list_add_tail(&new_entry->list, &env->calls);
2325                         maybe_push_call(pqueue, new_entry, inline_threshold);
2326                 }
2327
2328                 env->n_call_nodes += callee_env->n_call_nodes;
2329                 env->n_nodes += callee_env->n_nodes;
2330                 --callee_env->n_callers;
2331         }
2332         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2333         del_pqueue(pqueue);
2334 }
2335
2336 /*
2337  * Heuristic inliner. Calculates a benefice value for every call and inlines
2338  * those calls with a value higher than the threshold.
2339  */
2340 void inline_functions(unsigned maxsize, int inline_threshold) {
2341         inline_irg_env   *env;
2342         int              i, n_irgs;
2343         ir_graph         *rem;
2344         wenv_t           wenv;
2345         pmap             *copied_graphs;
2346         pmap_entry       *pm_entry;
2347         ir_graph         **irgs;
2348
2349         rem = current_ir_graph;
2350         obstack_init(&temp_obst);
2351
2352         irgs = create_irg_list();
2353
2354         /* a map for the copied graphs, used to inline recursive calls */
2355         copied_graphs = pmap_create();
2356
2357         /* extend all irgs by a temporary data structure for inlining. */
2358         n_irgs = get_irp_n_irgs();
2359         for (i = 0; i < n_irgs; ++i)
2360                 set_irg_link(irgs[i], alloc_inline_irg_env());
2361
2362         /* Pre-compute information in temporary data structure. */
2363         wenv.ignore_runtime = 0;
2364         wenv.ignore_callers = 0;
2365         for (i = 0; i < n_irgs; ++i) {
2366                 ir_graph *irg = irgs[i];
2367
2368                 free_callee_info(irg);
2369
2370                 wenv.x = get_irg_link(irg);
2371                 assure_cf_loop(irg);
2372                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2373         }
2374
2375         /* -- and now inline. -- */
2376         for (i = 0; i < n_irgs; ++i) {
2377                 ir_graph *irg = irgs[i];
2378
2379                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2380         }
2381
2382         for (i = 0; i < n_irgs; ++i) {
2383                 ir_graph *irg = irgs[i];
2384
2385                 env = get_irg_link(irg);
2386                 if (env->got_inline) {
2387                         /* this irg got calls inlined: optimize it */
2388                         if (get_opt_combo()) {
2389                                 if (env->local_vars) {
2390                                         scalar_replacement_opt(irg);
2391                                 }
2392                                 combo(irg);
2393                         } else {
2394                                 if (env->local_vars) {
2395                                         if (scalar_replacement_opt(irg)) {
2396                                                 optimize_graph_df(irg);
2397                                         }
2398                                 }
2399                                 optimize_cf(irg);
2400                         }
2401                 }
2402                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2403                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2404                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2405                         env->n_callers_orig, env->n_callers,
2406                         get_entity_name(get_irg_entity(irg))));
2407                 }
2408         }
2409
2410         /* kill the copied graphs: we don't need them anymore */
2411         foreach_pmap(copied_graphs, pm_entry) {
2412                 ir_graph *copy = pm_entry->value;
2413
2414                 /* reset the entity, otherwise it will be deleted in the next step ... */
2415                 set_irg_entity(copy, NULL);
2416                 free_ir_graph(copy);
2417         }
2418         pmap_destroy(copied_graphs);
2419
2420         xfree(irgs);
2421
2422         obstack_free(&temp_obst, NULL);
2423         current_ir_graph = rem;
2424 }
2425
2426 struct inline_functions_pass_t {
2427         ir_prog_pass_t pass;
2428         unsigned       maxsize;
2429         int            inline_threshold;
2430 };
2431
2432 /**
2433  * Wrapper to run inline_functions() as a ir_prog pass.
2434  */
2435 static int inline_functions_wrapper(ir_prog *irp, void *context) {
2436         struct inline_functions_pass_t *pass = context;
2437
2438         (void)irp;
2439         inline_functions(pass->maxsize, pass->inline_threshold);
2440         return 0;
2441 }
2442
2443 /* create a ir_prog pass for inline_functions */
2444 ir_prog_pass_t *inline_functions_pass(
2445           const char *name, unsigned maxsize, int inline_threshold) {
2446         struct inline_functions_pass_t *pass =
2447                 XMALLOCZ(struct inline_functions_pass_t);
2448
2449         pass->maxsize          = maxsize;
2450         pass->inline_threshold = inline_threshold;
2451
2452         return def_prog_pass_constructor(
2453                 &pass->pass, name ? name : "inline_functions",
2454                 inline_functions_wrapper);
2455 }
2456
2457 void firm_init_inline(void) {
2458         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2459 }