nsz Git - libfirm/blob - ir/opt/opt_inline.c

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief    Dead node elimination and Procedure Inlining.
  23  * @author   Michael Beck, Goetz Lindenmaier
  24  * @version  $Id$
  25  */
  26 #include "config.h"
  27
  28 #include <limits.h>
  29 #include <assert.h>
  30
  31 #include "irnode_t.h"
  32 #include "irgraph_t.h"
  33 #include "irprog_t.h"
  34
  35 #include "iroptimize.h"
  36 #include "ircons_t.h"
  37 #include "iropt_t.h"
  38 #include "irgopt.h"
  39 #include "irgmod.h"
  40 #include "irgwalk.h"
  41
  42 #include "array_t.h"
  43 #include "list.h"
  44 #include "pset.h"
  45 #include "pmap.h"
  46 #include "pdeq.h"
  47 #include "xmalloc.h"
  48 #include "pqueue.h"
  49
  50 #include "irouts.h"
  51 #include "irloop_t.h"
  52 #include "irbackedge_t.h"
  53 #include "opt_inline_t.h"
  54 #include "cgana.h"
  55 #include "trouts.h"
  56 #include "error.h"
  57
  58 #include "analyze_irg_args.h"
  59 #include "iredges_t.h"
  60 #include "irflag_t.h"
  61 #include "irhooks.h"
  62 #include "irtools.h"
  63 #include "iropt_dbg.h"
  64
  65 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  66
  67 /*------------------------------------------------------------------*/
  68 /* Routines for dead node elimination / copying garbage collection  */
  69 /* of the obstack.                                                  */
  70 /*------------------------------------------------------------------*/
  71
  72 /**
  73  * Remember the new node in the old node by using a field all nodes have.
  74  */
  75 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
  76
  77 /**
  78  * Get this new node, before the old node is forgotten.
  79  */
  80 #define get_new_node(oldn) get_irn_link(oldn)
  81
  82 /**
  83  * Check if a new node was set.
  84  */
  85 #define has_new_node(n) (get_new_node(n) != NULL)
  86
  87 /**
  88  * We use the block_visited flag to mark that we have computed the
  89  * number of useful predecessors for this block.
  90  * Further we encode the new arity in this flag in the old blocks.
  91  * Remembering the arity is useful, as it saves a lot of pointer
  92  * accesses.  This function is called for all Phi and Block nodes
  93  * in a Block.
  94  */
  95 static inline int
  96 compute_new_arity(ir_node *b) {
  97         int i, res, irn_arity;
  98         int irg_v, block_v;
  99
 100         irg_v = get_irg_block_visited(current_ir_graph);
 101         block_v = get_Block_block_visited(b);
 102         if (block_v >= irg_v) {
 103                 /* we computed the number of preds for this block and saved it in the
 104                    block_v flag */
 105                 return block_v - irg_v;
 106         } else {
 107                 /* compute the number of good predecessors */
 108                 res = irn_arity = get_irn_arity(b);
 109                 for (i = 0; i < irn_arity; i++)
 110                         if (is_Bad(get_irn_n(b, i))) res--;
 111                         /* save it in the flag. */
 112                         set_Block_block_visited(b, irg_v + res);
 113                         return res;
 114         }
 115 }
 116
 117 /**
 118  * Copies the node to the new obstack. The Ins of the new node point to
 119  * the predecessors on the old obstack.  For block/phi nodes not all
 120  * predecessors might be copied.  n->link points to the new node.
 121  * For Phi and Block nodes the function allocates in-arrays with an arity
 122  * only for useful predecessors.  The arity is determined by counting
 123  * the non-bad predecessors of the block.
 124  *
 125  * @param n    The node to be copied
 126  * @param env  if non-NULL, the node number attribute will be copied to the new node
 127  *
 128  * Note: Also used for loop unrolling.
 129  */
 130 static void copy_node(ir_node *n, void *env) {
 131         ir_node *nn, *block;
 132         int new_arity;
 133         ir_op *op = get_irn_op(n);
 134         (void) env;
 135
 136         if (op == op_Bad) {
 137                 /* node copied already */
 138                 return;
 139         } else if (op == op_Block) {
 140                 block = NULL;
 141                 new_arity = compute_new_arity(n);
 142                 n->attr.block.graph_arr = NULL;
 143         } else {
 144                 block = get_nodes_block(n);
 145                 if (op == op_Phi) {
 146                         new_arity = compute_new_arity(block);
 147                 } else {
 148                         new_arity = get_irn_arity(n);
 149                 }
 150         }
 151         nn = new_ir_node(get_irn_dbg_info(n),
 152                 current_ir_graph,
 153                 block,
 154                 op,
 155                 get_irn_mode(n),
 156                 new_arity,
 157                 get_irn_in(n) + 1);
 158         /* Copy the attributes.  These might point to additional data.  If this
 159            was allocated on the old obstack the pointers now are dangling.  This
 160            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
 161         if (op == op_Block) {
 162                 /* we cannot allow blocks WITHOUT macroblock input */
 163                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
 164         }
 165         copy_node_attr(n, nn);
 166
 167         if (env != NULL) {
 168                 /* for easier debugging, we want to copy the node numbers too */
 169                 nn->node_nr = n->node_nr;
 170         }
 171
 172         set_new_node(n, nn);
 173         hook_dead_node_elim_subst(current_ir_graph, n, nn);
 174 }
 175
 176 /**
 177  * Copies new predecessors of old node to new node remembered in link.
 178  * Spare the Bad predecessors of Phi and Block nodes.
 179  */
 180 static void copy_preds(ir_node *n, void *env) {
 181         ir_node *nn, *block;
 182         int i, j, irn_arity;
 183         (void) env;
 184
 185         nn = get_new_node(n);
 186
 187         if (is_Block(n)) {
 188                 /* copy the macro block header */
 189                 ir_node *mbh = get_Block_MacroBlock(n);
 190
 191                 if (mbh == n) {
 192                         /* this block is a macroblock header */
 193                         set_Block_MacroBlock(nn, nn);
 194                 } else {
 195                         /* get the macro block header */
 196                         ir_node *nmbh = get_new_node(mbh);
 197                         assert(nmbh != NULL);
 198                         set_Block_MacroBlock(nn, nmbh);
 199                 }
 200
 201                 /* Don't copy Bad nodes. */
 202                 j = 0;
 203                 irn_arity = get_irn_arity(n);
 204                 for (i = 0; i < irn_arity; i++) {
 205                         if (! is_Bad(get_irn_n(n, i))) {
 206                                 ir_node *pred = get_irn_n(n, i);
 207                                 set_irn_n(nn, j, get_new_node(pred));
 208                                 j++;
 209                         }
 210                 }
 211                 /* repair the block visited flag from above misuse. Repair it in both
 212                    graphs so that the old one can still be used. */
 213                 set_Block_block_visited(nn, 0);
 214                 set_Block_block_visited(n, 0);
 215                 /* Local optimization could not merge two subsequent blocks if
 216                    in array contained Bads.  Now it's possible.
 217                    We don't call optimize_in_place as it requires
 218                    that the fields in ir_graph are set properly. */
 219                 if (!has_Block_label(nn) &&
 220                     get_opt_control_flow_straightening() &&
 221                     get_Block_n_cfgpreds(nn) == 1 &&
 222                     is_Jmp(get_Block_cfgpred(nn, 0))) {
 223                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
 224                         if (nn == old) {
 225                                 /* Jmp jumps into the block it is in -- deal self cycle. */
 226                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
 227                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
 228                         } else {
 229                                 exchange(nn, old);
 230                         }
 231                 }
 232         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
 233                 /* Don't copy node if corresponding predecessor in block is Bad.
 234                    The Block itself should not be Bad. */
 235                 block = get_nodes_block(n);
 236                 set_nodes_block(nn, get_new_node(block));
 237                 j = 0;
 238                 irn_arity = get_irn_arity(n);
 239                 for (i = 0; i < irn_arity; i++) {
 240                         if (! is_Bad(get_irn_n(block, i))) {
 241                                 ir_node *pred = get_irn_n(n, i);
 242                                 set_irn_n(nn, j, get_new_node(pred));
 243                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
 244                                 j++;
 245                         }
 246                 }
 247                 /* If the pre walker reached this Phi after the post walker visited the
 248                    block block_visited is > 0. */
 249                 set_Block_block_visited(get_nodes_block(n), 0);
 250                 /* Compacting the Phi's ins might generate Phis with only one
 251                    predecessor. */
 252                 if (get_irn_arity(nn) == 1)
 253                         exchange(nn, get_irn_n(nn, 0));
 254         } else {
 255                 irn_arity = get_irn_arity(n);
 256                 for (i = -1; i < irn_arity; i++)
 257                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
 258         }
 259         /* Now the new node is complete.  We can add it to the hash table for CSE.
 260            @@@ inlining aborts if we identify End. Why? */
 261         if (!is_End(nn))
 262                 add_identities(current_ir_graph->value_table, nn);
 263 }
 264
 265 /**
 266  * Copies the graph recursively, compacts the keep-alives of the end node.
 267  *
 268  * @param irg           the graph to be copied
 269  * @param copy_node_nr  If non-zero, the node number will be copied
 270  */
 271 static void copy_graph(ir_graph *irg, int copy_node_nr) {
 272         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
 273         ir_node *ka;      /* keep alive */
 274         int i, irn_arity;
 275         unsigned long vfl;
 276
 277         /* Some nodes must be copied by hand, sigh */
 278         vfl = get_irg_visited(irg);
 279         set_irg_visited(irg, vfl + 1);
 280
 281         oe = get_irg_end(irg);
 282         mark_irn_visited(oe);
 283         /* copy the end node by hand, allocate dynamic in array! */
 284         ne = new_ir_node(get_irn_dbg_info(oe),
 285                 irg,
 286                 NULL,
 287                 op_End,
 288                 mode_X,
 289                 -1,
 290                 NULL);
 291         /* Copy the attributes.  Well, there might be some in the future... */
 292         copy_node_attr(oe, ne);
 293         set_new_node(oe, ne);
 294
 295         /* copy the Bad node */
 296         ob = get_irg_bad(irg);
 297         mark_irn_visited(ob);
 298         nb = new_ir_node(get_irn_dbg_info(ob),
 299                 irg,
 300                 NULL,
 301                 op_Bad,
 302                 mode_T,
 303                 0,
 304                 NULL);
 305         copy_node_attr(ob, nb);
 306         set_new_node(ob, nb);
 307
 308         /* copy the NoMem node */
 309         om = get_irg_no_mem(irg);
 310         mark_irn_visited(om);
 311         nm = new_ir_node(get_irn_dbg_info(om),
 312                 irg,
 313                 NULL,
 314                 op_NoMem,
 315                 mode_M,
 316                 0,
 317                 NULL);
 318         copy_node_attr(om, nm);
 319         set_new_node(om, nm);
 320
 321         /* copy the live nodes */
 322         set_irg_visited(irg, vfl);
 323         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 324
 325         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
 326
 327         /* visit the anchors as well */
 328         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 329                 ir_node *n = get_irg_anchor(irg, i);
 330
 331                 if (n && (get_irn_visited(n) <= vfl)) {
 332                         set_irg_visited(irg, vfl);
 333                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 334                 }
 335         }
 336
 337         /* copy_preds for the end node ... */
 338         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
 339
 340         /*- ... and now the keep alives. -*/
 341         /* First pick the not marked block nodes and walk them.  We must pick these
 342            first as else we will oversee blocks reachable from Phis. */
 343         irn_arity = get_End_n_keepalives(oe);
 344         for (i = 0; i < irn_arity; i++) {
 345                 ka = get_End_keepalive(oe, i);
 346                 if (is_Block(ka)) {
 347                         if (get_irn_visited(ka) <= vfl) {
 348                                 /* We must keep the block alive and copy everything reachable */
 349                                 set_irg_visited(irg, vfl);
 350                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 351                         }
 352                         add_End_keepalive(ne, get_new_node(ka));
 353                 }
 354         }
 355
 356         /* Now pick other nodes.  Here we will keep all! */
 357         irn_arity = get_End_n_keepalives(oe);
 358         for (i = 0; i < irn_arity; i++) {
 359                 ka = get_End_keepalive(oe, i);
 360                 if (!is_Block(ka)) {
 361                         if (get_irn_visited(ka) <= vfl) {
 362                                 /* We didn't copy the node yet.  */
 363                                 set_irg_visited(irg, vfl);
 364                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 365                         }
 366                         add_End_keepalive(ne, get_new_node(ka));
 367                 }
 368         }
 369
 370         /* start block sometimes only reached after keep alives */
 371         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
 372         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
 373 }
 374
 375 /**
 376  * Copies the graph reachable from current_ir_graph->end to the obstack
 377  * in current_ir_graph and fixes the environment.
 378  * Then fixes the fields in current_ir_graph containing nodes of the
 379  * graph.
 380  *
 381  * @param copy_node_nr  If non-zero, the node number will be copied
 382  */
 383 static void
 384 copy_graph_env(int copy_node_nr) {
 385         ir_graph *irg = current_ir_graph;
 386         ir_node *old_end, *new_anchor;
 387         int i;
 388
 389         /* remove end_except and end_reg nodes */
 390         old_end = get_irg_end(irg);
 391         set_irg_end_except (irg, old_end);
 392         set_irg_end_reg    (irg, old_end);
 393
 394         /* Not all nodes remembered in irg might be reachable
 395            from the end node.  Assure their link is set to NULL, so that
 396            we can test whether new nodes have been computed. */
 397         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 398                 ir_node *n = get_irg_anchor(irg, i);
 399                 if (n != NULL)
 400                         set_new_node(n, NULL);
 401         }
 402         /* we use the block walk flag for removing Bads from Blocks ins. */
 403         inc_irg_block_visited(irg);
 404
 405         /* copy the graph */
 406         copy_graph(irg, copy_node_nr);
 407
 408         /* fix the anchor */
 409         old_end    = get_irg_end(irg);
 410         new_anchor = new_Anchor(irg);
 411
 412         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 413                 ir_node *n = get_irg_anchor(irg, i);
 414                 if (n)
 415                         set_irn_n(new_anchor, i, get_new_node(n));
 416         }
 417         free_End(old_end);
 418         irg->anchor = new_anchor;
 419
 420         /* ensure the new anchor is placed in the endblock */
 421         set_nodes_block(new_anchor, get_irg_end_block(irg));
 422 }
 423
 424 /**
 425  * Copies all reachable nodes to a new obstack.  Removes bad inputs
 426  * from block nodes and the corresponding inputs from Phi nodes.
 427  * Merges single exit blocks with single entry blocks and removes
 428  * 1-input Phis.
 429  * Adds all new nodes to a new hash table for CSE.  Does not
 430  * perform CSE, so the hash table might contain common subexpressions.
 431  */
 432 void dead_node_elimination(ir_graph *irg) {
 433         ir_graph *rem;
 434 #ifdef INTERPROCEDURAL_VIEW
 435         int rem_ipview = get_interprocedural_view();
 436 #endif
 437         struct obstack *graveyard_obst = NULL;
 438         struct obstack *rebirth_obst   = NULL;
 439
 440         edges_deactivate(irg);
 441
 442         /* inform statistics that we started a dead-node elimination run */
 443         hook_dead_node_elim(irg, 1);
 444
 445         /* Remember external state of current_ir_graph. */
 446         rem = current_ir_graph;
 447         current_ir_graph = irg;
 448 #ifdef INTERPROCEDURAL_VIEW
 449         set_interprocedural_view(0);
 450 #endif
 451
 452         assert(get_irg_phase_state(irg) != phase_building);
 453
 454         /* Handle graph state */
 455         free_callee_info(irg);
 456         free_irg_outs(irg);
 457         free_trouts();
 458
 459         /* @@@ so far we loose loops when copying */
 460         free_loop_information(irg);
 461
 462         set_irg_doms_inconsistent(irg);
 463
 464         /* A quiet place, where the old obstack can rest in peace,
 465            until it will be cremated. */
 466         graveyard_obst = irg->obst;
 467
 468         /* A new obstack, where the reachable nodes will be copied to. */
 469         rebirth_obst = XMALLOC(struct obstack);
 470         irg->obst = rebirth_obst;
 471         obstack_init(irg->obst);
 472         irg->last_node_idx = 0;
 473
 474         /* We also need a new value table for CSE */
 475         del_identities(irg->value_table);
 476         irg->value_table = new_identities();
 477
 478         /* Copy the graph from the old to the new obstack */
 479         copy_graph_env(/*copy_node_nr=*/1);
 480
 481         /* Free memory from old unoptimized obstack */
 482         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
 483         xfree(graveyard_obst);            /* ... then free it.           */
 484
 485         /* inform statistics that the run is over */
 486         hook_dead_node_elim(irg, 0);
 487
 488         current_ir_graph = rem;
 489 #ifdef INTERPROCEDURAL_VIEW
 490         set_interprocedural_view(rem_ipview);
 491 #endif
 492 }
 493
 494 /**
 495  * Relink bad predecessors of a block and store the old in array to the
 496  * link field. This function is called by relink_bad_predecessors().
 497  * The array of link field starts with the block operand at position 0.
 498  * If block has bad predecessors, create a new in array without bad preds.
 499  * Otherwise let in array untouched.
 500  */
 501 static void relink_bad_block_predecessors(ir_node *n, void *env) {
 502         ir_node **new_in, *irn;
 503         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
 504         (void) env;
 505
 506         /* if link field of block is NULL, look for bad predecessors otherwise
 507            this is already done */
 508         if (is_Block(n) && get_irn_link(n) == NULL) {
 509                 /* save old predecessors in link field (position 0 is the block operand)*/
 510                 set_irn_link(n, get_irn_in(n));
 511
 512                 /* count predecessors without bad nodes */
 513                 old_irn_arity = get_irn_arity(n);
 514                 for (i = 0; i < old_irn_arity; i++)
 515                         if (!is_Bad(get_irn_n(n, i)))
 516                                 ++new_irn_arity;
 517
 518                 /* arity changing: set new predecessors without bad nodes */
 519                 if (new_irn_arity < old_irn_arity) {
 520                         /* Get new predecessor array. We do not resize the array, as we must
 521                            keep the old one to update Phis. */
 522                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
 523
 524                         /* set new predecessors in array */
 525                         new_in[0] = NULL;
 526                         new_irn_n = 1;
 527                         for (i = 0; i < old_irn_arity; i++) {
 528                                 irn = get_irn_n(n, i);
 529                                 if (!is_Bad(irn)) {
 530                                         new_in[new_irn_n] = irn;
 531                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
 532                                         ++new_irn_n;
 533                                 }
 534                         }
 535                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
 536                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
 537                         n->in = new_in;
 538                 } /* ir node has bad predecessors */
 539         } /* Block is not relinked */
 540 }
 541
 542 /**
 543  * Relinks Bad predecessors from Blocks and Phis called by walker
 544  * remove_bad_predecesors(). If n is a Block, call
 545  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
 546  * function of Phi's Block. If this block has bad predecessors, relink preds
 547  * of the Phi-node.
 548  */
 549 static void relink_bad_predecessors(ir_node *n, void *env) {
 550         ir_node *block, **old_in;
 551         int i, old_irn_arity, new_irn_arity;
 552
 553         /* relink bad predecessors of a block */
 554         if (is_Block(n))
 555                 relink_bad_block_predecessors(n, env);
 556
 557         /* If Phi node relink its block and its predecessors */
 558         if (is_Phi(n)) {
 559                 /* Relink predecessors of phi's block */
 560                 block = get_nodes_block(n);
 561                 if (get_irn_link(block) == NULL)
 562                         relink_bad_block_predecessors(block, env);
 563
 564                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
 565                 old_irn_arity = ARR_LEN(old_in);
 566
 567                 /* Relink Phi predecessors if count of predecessors changed */
 568                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
 569                         /* set new predecessors in array
 570                            n->in[0] remains the same block */
 571                         new_irn_arity = 1;
 572                         for(i = 1; i < old_irn_arity; i++)
 573                                 if (!is_Bad(old_in[i])) {
 574                                         n->in[new_irn_arity] = n->in[i];
 575                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
 576                                         ++new_irn_arity;
 577                                 }
 578
 579                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
 580                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
 581                 }
 582         } /* n is a Phi node */
 583 }
 584
 585 /*
 586  * Removes Bad Bad predecessors from Blocks and the corresponding
 587  * inputs to Phi nodes as in dead_node_elimination but without
 588  * copying the graph.
 589  * On walking up set the link field to NULL, on walking down call
 590  * relink_bad_predecessors() (This function stores the old in array
 591  * to the link field and sets a new in array if arity of predecessors
 592  * changes).
 593  */
 594 void remove_bad_predecessors(ir_graph *irg) {
 595         panic("Fix backedge handling first");
 596         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
 597 }
 598
 599
 600 /*
 601    __                      _  __ __
 602   (_     __    o     _    | \/  |_
 603   __)|_| | \_/ | \_/(/_   |_/\__|__
 604
 605   The following stuff implements a facility that automatically patches
 606   registered ir_node pointers to the new node when a dead node elimination occurs.
 607 */
 608
 609 struct _survive_dce_t {
 610         struct obstack obst;
 611         pmap *places;
 612         pmap *new_places;
 613         hook_entry_t dead_node_elim;
 614         hook_entry_t dead_node_elim_subst;
 615 };
 616
 617 typedef struct _survive_dce_list_t {
 618         struct _survive_dce_list_t *next;
 619         ir_node **place;
 620 } survive_dce_list_t;
 621
 622 static void dead_node_hook(void *context, ir_graph *irg, int start) {
 623         survive_dce_t *sd = context;
 624         (void) irg;
 625
 626         /* Create a new map before the dead node elimination is performed. */
 627         if (start) {
 628                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
 629         } else {
 630                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
 631                 pmap_destroy(sd->places);
 632                 sd->places     = sd->new_places;
 633                 sd->new_places = NULL;
 634         }
 635 }
 636
 637 /**
 638  * Hook called when dead node elimination replaces old by nw.
 639  */
 640 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw) {
 641         survive_dce_t *sd = context;
 642         survive_dce_list_t *list = pmap_get(sd->places, old);
 643         (void) irg;
 644
 645         /* If the node is to be patched back, write the new address to all registered locations. */
 646         if (list) {
 647                 survive_dce_list_t *p;
 648
 649                 for (p = list; p; p = p->next)
 650                         *(p->place) = nw;
 651
 652                 pmap_insert(sd->new_places, nw, list);
 653         }
 654 }
 655
 656 /**
 657  * Make a new Survive DCE environment.
 658  */
 659 survive_dce_t *new_survive_dce(void) {
 660         survive_dce_t *res = XMALLOC(survive_dce_t);
 661         obstack_init(&res->obst);
 662         res->places     = pmap_create();
 663         res->new_places = NULL;
 664
 665         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
 666         res->dead_node_elim.context                   = res;
 667         res->dead_node_elim.next                      = NULL;
 668
 669         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
 670         res->dead_node_elim_subst.context = res;
 671         res->dead_node_elim_subst.next    = NULL;
 672
 673         register_hook(hook_dead_node_elim, &res->dead_node_elim);
 674         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
 675         return res;
 676 }
 677
 678 /**
 679  * Free a Survive DCE environment.
 680  */
 681 void free_survive_dce(survive_dce_t *sd) {
 682         obstack_free(&sd->obst, NULL);
 683         pmap_destroy(sd->places);
 684         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
 685         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
 686         xfree(sd);
 687 }
 688
 689 /**
 690  * Register a node pointer to be patched upon DCE.
 691  * When DCE occurs, the node pointer specified by @p place will be
 692  * patched to the new address of the node it is pointing to.
 693  *
 694  * @param sd    The Survive DCE environment.
 695  * @param place The address of the node pointer.
 696  */
 697 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
 698         if (*place != NULL) {
 699                 ir_node *irn      = *place;
 700                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
 701                 survive_dce_list_t *nw   = obstack_alloc(&sd->obst, sizeof(nw[0]));
 702
 703                 nw->next  = curr;
 704                 nw->place = place;
 705
 706                 pmap_insert(sd->places, irn, nw);
 707         }
 708 }
 709
 710 /*--------------------------------------------------------------------*/
 711 /*  Functionality for inlining                                         */
 712 /*--------------------------------------------------------------------*/
 713
 714 /**
 715  * Copy node for inlineing.  Updates attributes that change when
 716  * inlineing but not for dead node elimination.
 717  *
 718  * Copies the node by calling copy_node() and then updates the entity if
 719  * it's a local one.  env must be a pointer of the frame type of the
 720  * inlined procedure. The new entities must be in the link field of
 721  * the entities.
 722  */
 723 static void copy_node_inline(ir_node *n, void *env) {
 724         ir_node *nn;
 725         ir_type *frame_tp = (ir_type *)env;
 726
 727         copy_node(n, NULL);
 728         if (is_Sel(n)) {
 729                 nn = get_new_node (n);
 730                 assert(is_Sel(nn));
 731                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
 732                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
 733                 }
 734         } else if (is_Block(n)) {
 735                 nn = get_new_node (n);
 736                 nn->attr.block.irg = current_ir_graph;
 737         }
 738 }
 739
 740 /**
 741  * Copies new predecessors of old node and move constants to
 742  * the Start Block.
 743  */
 744 static void copy_preds_inline(ir_node *n, void *env) {
 745         ir_node *nn;
 746
 747         copy_preds(n, env);
 748         nn = skip_Id(get_new_node(n));
 749         if (is_irn_constlike(nn)) {
 750                 /* move Constants into the start block */
 751                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
 752
 753                 n = identify_remember(current_ir_graph->value_table, nn);
 754                 if (nn != n) {
 755                         DBG_OPT_CSE(nn, n);
 756                         exchange(nn, n);
 757                 }
 758         }
 759 }
 760
 761 /**
 762  * Walker: checks if P_value_arg_base is used.
 763  */
 764 static void find_addr(ir_node *node, void *env) {
 765         int *allow_inline = env;
 766         if (is_Sel(node)) {
 767                 ir_graph *irg = current_ir_graph;
 768                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
 769                         /* access to frame */
 770                         ir_entity *ent = get_Sel_entity(node);
 771                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
 772                                 /* access to value_type */
 773                                 *allow_inline = 0;
 774                         }
 775                 }
 776         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
 777                 /* From GCC:
 778                  * Refuse to inline alloca call unless user explicitly forced so as this
 779                  * may change program's memory overhead drastically when the function
 780                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
 781                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
 782                  *
 783                  * Sorrily this is true with our implementation also.
 784                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
 785                  * disables inlining of functions using VLA (with are completely save).
 786                  *
 787                  * 2 Solutions:
 788                  * - add a flag to the Alloc node for "real" alloca() calls
 789                  * - add a new Stack-Restore node at the end of a function using alloca()
 790                  */
 791                 *allow_inline = 0;
 792         }
 793 }
 794
 795 /**
 796  * Check if we can inline a given call.
 797  * Currently, we cannot inline two cases:
 798  * - call with compound arguments
 799  * - graphs that take the address of a parameter
 800  *
 801  * check these conditions here
 802  */
 803 static int can_inline(ir_node *call, ir_graph *called_graph) {
 804         ir_type *call_type = get_Call_type(call);
 805         int params, ress, i, res;
 806         assert(is_Method_type(call_type));
 807
 808         params = get_method_n_params(call_type);
 809         ress   = get_method_n_ress(call_type);
 810
 811         /* check parameters for compound arguments */
 812         for (i = 0; i < params; ++i) {
 813                 ir_type *p_type = get_method_param_type(call_type, i);
 814
 815                 if (is_compound_type(p_type))
 816                         return 0;
 817         }
 818
 819         /* check results for compound arguments */
 820         for (i = 0; i < ress; ++i) {
 821                 ir_type *r_type = get_method_res_type(call_type, i);
 822
 823                 if (is_compound_type(r_type))
 824                         return 0;
 825         }
 826
 827         res = 1;
 828         irg_walk_graph(called_graph, find_addr, NULL, &res);
 829
 830         return res;
 831 }
 832
 833 enum exc_mode {
 834         exc_handler    = 0, /**< There is a handler. */
 835         exc_to_end     = 1, /**< Branches to End. */
 836         exc_no_handler = 2  /**< Exception handling not represented. */
 837 };
 838
 839 /* Inlines a method at the given call site. */
 840 int inline_method(ir_node *call, ir_graph *called_graph) {
 841         ir_node             *pre_call;
 842         ir_node             *post_call, *post_bl;
 843         ir_node             *in[pn_Start_max];
 844         ir_node             *end, *end_bl, *block;
 845         ir_node             **res_pred;
 846         ir_node             **cf_pred;
 847         ir_node             **args_in;
 848         ir_node             *ret, *phi;
 849         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
 850         enum exc_mode       exc_handling;
 851         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
 852         ir_entity           *ent;
 853         ir_graph            *rem, *irg;
 854         irg_inline_property prop = get_irg_inline_property(called_graph);
 855         unsigned long       visited;
 856
 857         if (prop == irg_inline_forbidden)
 858                 return 0;
 859
 860         ent = get_irg_entity(called_graph);
 861
 862         mtp = get_entity_type(ent);
 863         ctp = get_Call_type(call);
 864         n_params = get_method_n_params(mtp);
 865         n_res    = get_method_n_ress(mtp);
 866         if (n_params > get_method_n_params(ctp)) {
 867                 /* this is a bad feature of C: without a prototype, we can
 868                  * call a function with less parameters than needed. Currently
 869                  * we don't support this, although we could use Unknown than. */
 870                 return 0;
 871         }
 872         if (n_res != get_method_n_ress(ctp)) {
 873                 return 0;
 874         }
 875
 876         /* Argh, compiling C has some bad consequences:
 877          * It is implementation dependent what happens in that case.
 878          * We support inlining, if the bitsize of the types matches AND
 879          * the same arithmetic is used. */
 880         for (i = n_params - 1; i >= 0; --i) {
 881                 ir_type *param_tp = get_method_param_type(mtp, i);
 882                 ir_type *arg_tp   = get_method_param_type(ctp, i);
 883
 884                 if (param_tp != arg_tp) {
 885                         ir_mode *pmode = get_type_mode(param_tp);
 886                         ir_mode *amode = get_type_mode(arg_tp);
 887
 888                         if (pmode == NULL || amode == NULL)
 889                                 return 0;
 890                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
 891                                 return 0;
 892                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
 893                                 return 0;
 894                         /* otherwise we can simply "reinterpret" the bits */
 895                 }
 896         }
 897         for (i = n_res - 1; i >= 0; --i) {
 898                 ir_type *decl_res_tp = get_method_res_type(mtp, i);
 899                 ir_type *used_res_tp = get_method_res_type(ctp, i);
 900
 901                 if (decl_res_tp != used_res_tp) {
 902                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
 903                         ir_mode *used_mode = get_type_mode(used_res_tp);
 904                         if (decl_mode == NULL || used_mode == NULL)
 905                                 return 0;
 906                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
 907                                 return 0;
 908                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
 909                                 return 0;
 910                         /* otherwise we can "reinterpret" the bits */
 911                 }
 912         }
 913
 914         irg = get_irn_irg(call);
 915
 916         /*
 917          * We cannot inline a recursive call. The graph must be copied before
 918          * the call the inline_method() using create_irg_copy().
 919          */
 920         if (called_graph == irg)
 921                 return 0;
 922
 923         /*
 924          * currently, we cannot inline two cases:
 925          * - call with compound arguments
 926          * - graphs that take the address of a parameter
 927          */
 928         if (! can_inline(call, called_graph))
 929                 return 0;
 930
 931         rem = current_ir_graph;
 932         current_ir_graph = irg;
 933
 934         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
 935
 936         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
 937         rem_opt = get_opt_optimize();
 938         set_optimize(0);
 939
 940         /* Handle graph state */
 941         assert(get_irg_phase_state(irg) != phase_building);
 942         assert(get_irg_pinned(irg) == op_pin_state_pinned);
 943         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
 944         set_irg_outs_inconsistent(irg);
 945         set_irg_extblk_inconsistent(irg);
 946         set_irg_doms_inconsistent(irg);
 947         set_irg_loopinfo_inconsistent(irg);
 948         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 949         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
 950
 951         /* -- Check preconditions -- */
 952         assert(is_Call(call));
 953
 954         /* here we know we WILL inline, so inform the statistics */
 955         hook_inline(call, called_graph);
 956
 957         /* -- Decide how to handle exception control flow: Is there a handler
 958            for the Call node, or do we branch directly to End on an exception?
 959            exc_handling:
 960            0 There is a handler.
 961            1 Branches to End.
 962            2 Exception handling not represented in Firm. -- */
 963         {
 964                 ir_node *proj, *Mproj = NULL, *Xproj = NULL;
 965                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
 966                         long proj_nr = get_Proj_proj(proj);
 967                         if (proj_nr == pn_Call_X_except) Xproj = proj;
 968                         if (proj_nr == pn_Call_M_except) Mproj = proj;
 969                 }
 970                 if      (Mproj) { assert(Xproj); exc_handling = exc_handler; } /*  Mproj           */
 971                 else if (Xproj) {                exc_handling = exc_to_end; } /* !Mproj &&  Xproj   */
 972                 else            {                exc_handling = exc_no_handler; } /* !Mproj && !Xproj   */
 973         }
 974
 975         /* create the argument tuple */
 976         NEW_ARR_A(ir_type *, args_in, n_params);
 977
 978         block = get_nodes_block(call);
 979         for (i = n_params - 1; i >= 0; --i) {
 980                 ir_node *arg      = get_Call_param(call, i);
 981                 ir_type *param_tp = get_method_param_type(mtp, i);
 982                 ir_mode *mode     = get_type_mode(param_tp);
 983
 984                 if (mode != get_irn_mode(arg)) {
 985                         arg = new_r_Conv(irg, block, arg, mode);
 986                 }
 987                 args_in[i] = arg;
 988         }
 989
 990         /* --
 991            the procedure and later replaces the Start node of the called graph.
 992            Post_call is the old Call node and collects the results of the called
 993            graph. Both will end up being a tuple.  -- */
 994         post_bl = get_nodes_block(call);
 995         set_irg_current_block(irg, post_bl);
 996         /* XxMxPxPxPxT of Start + parameter of Call */
 997         in[pn_Start_X_initial_exec]   = new_Jmp();
 998         in[pn_Start_M]                = get_Call_mem(call);
 999         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
1000         in[pn_Start_P_tls]            = get_irg_tls(irg);
1001         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
1002         pre_call = new_Tuple(pn_Start_max, in);
1003         post_call = call;
1004
1005         /* --
1006            The new block gets the ins of the old block, pre_call and all its
1007            predecessors and all Phi nodes. -- */
1008         part_block(pre_call);
1009
1010         /* -- Prepare state for dead node elimination -- */
1011         /* Visited flags in calling irg must be >= flag in called irg.
1012            Else walker and arity computation will not work. */
1013         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
1014                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
1015         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
1016                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
1017         visited = get_irg_visited(irg);
1018
1019         /* Set pre_call as new Start node in link field of the start node of
1020            calling graph and pre_calls block as new block for the start block
1021            of calling graph.
1022            Further mark these nodes so that they are not visited by the
1023            copying. */
1024         set_irn_link(get_irg_start(called_graph), pre_call);
1025         set_irn_visited(get_irg_start(called_graph), visited);
1026         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1027         set_irn_visited(get_irg_start_block(called_graph), visited);
1028
1029         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1030         set_irn_visited(get_irg_bad(called_graph), visited);
1031
1032         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1033         set_irn_visited(get_irg_no_mem(called_graph), visited);
1034
1035         /* Initialize for compaction of in arrays */
1036         inc_irg_block_visited(irg);
1037
1038         /* -- Replicate local entities of the called_graph -- */
1039         /* copy the entities. */
1040         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
1041         called_frame = get_irg_frame_type(called_graph);
1042         curr_frame   = get_irg_frame_type(irg);
1043         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1044                 ir_entity *new_ent, *old_ent;
1045                 old_ent = get_class_member(called_frame, i);
1046                 new_ent = copy_entity_own(old_ent, curr_frame);
1047                 set_entity_link(old_ent, new_ent);
1048         }
1049
1050         /* visited is > than that of called graph.  With this trick visited will
1051            remain unchanged so that an outer walker, e.g., searching the call nodes
1052             to inline, calling this inline will not visit the inlined nodes. */
1053         set_irg_visited(irg, get_irg_visited(irg)-1);
1054
1055         /* -- Performing dead node elimination inlines the graph -- */
1056         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1057            entities. */
1058         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1059                  get_irg_frame_type(called_graph));
1060
1061         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
1062
1063         /* Repair called_graph */
1064         set_irg_visited(called_graph, get_irg_visited(irg));
1065         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1066         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1067
1068         /* -- Merge the end of the inlined procedure with the call site -- */
1069         /* We will turn the old Call node into a Tuple with the following
1070            predecessors:
1071            -1:  Block of Tuple.
1072            0: Phi of all Memories of Return statements.
1073            1: Jmp from new Block that merges the control flow from all exception
1074            predecessors of the old end block.
1075            2: Tuple of all arguments.
1076            3: Phi of Exception memories.
1077            In case the old Call directly branches to End on an exception we don't
1078            need the block merging all exceptions nor the Phi of the exception
1079            memories.
1080         */
1081
1082         /* -- Precompute some values -- */
1083         end_bl = get_new_node(get_irg_end_block(called_graph));
1084         end = get_new_node(get_irg_end(called_graph));
1085         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1086         n_res = get_method_n_ress(get_Call_type(call));
1087
1088         res_pred = XMALLOCN(ir_node*, n_res);
1089         cf_pred  = XMALLOCN(ir_node*, arity);
1090
1091         set_irg_current_block(irg, post_bl); /* just to make sure */
1092
1093         /* -- archive keepalives -- */
1094         irn_arity = get_irn_arity(end);
1095         for (i = 0; i < irn_arity; i++) {
1096                 ir_node *ka = get_End_keepalive(end, i);
1097                 if (! is_Bad(ka))
1098                         add_End_keepalive(get_irg_end(irg), ka);
1099         }
1100
1101         /* The new end node will die.  We need not free as the in array is on the obstack:
1102            copy_node() only generated 'D' arrays. */
1103
1104         /* -- Replace Return nodes by Jump nodes. -- */
1105         n_ret = 0;
1106         for (i = 0; i < arity; i++) {
1107                 ir_node *ret;
1108                 ret = get_Block_cfgpred(end_bl, i);
1109                 if (is_Return(ret)) {
1110                         cf_pred[n_ret] = new_r_Jmp(irg, get_nodes_block(ret));
1111                         n_ret++;
1112                 }
1113         }
1114         set_irn_in(post_bl, n_ret, cf_pred);
1115
1116         /* -- Build a Tuple for all results of the method.
1117            Add Phi node if there was more than one Return.  -- */
1118         turn_into_tuple(post_call, pn_Call_max);
1119         /* First the Memory-Phi */
1120         n_ret = 0;
1121         for (i = 0; i < arity; i++) {
1122                 ret = get_Block_cfgpred(end_bl, i);
1123                 if (is_Return(ret)) {
1124                         cf_pred[n_ret] = get_Return_mem(ret);
1125                         n_ret++;
1126                 }
1127         }
1128         phi = new_Phi(n_ret, cf_pred, mode_M);
1129         set_Tuple_pred(call, pn_Call_M_regular, phi);
1130         /* Conserve Phi-list for further inlinings -- but might be optimized */
1131         if (get_nodes_block(phi) == post_bl) {
1132                 set_irn_link(phi, get_irn_link(post_bl));
1133                 set_irn_link(post_bl, phi);
1134         }
1135         /* Now the real results */
1136         if (n_res > 0) {
1137                 for (j = 0; j < n_res; j++) {
1138                         ir_type *res_type = get_method_res_type(ctp, j);
1139                         ir_mode *res_mode = get_type_mode(res_type);
1140                         n_ret = 0;
1141                         for (i = 0; i < arity; i++) {
1142                                 ret = get_Block_cfgpred(end_bl, i);
1143                                 if (is_Return(ret)) {
1144                                         ir_node *res = get_Return_res(ret, j);
1145                                         if (get_irn_mode(res) != res_mode) {
1146                                                 ir_node *block = get_nodes_block(res);
1147                                                 res = new_r_Conv(irg, block, res, res_mode);
1148                                         }
1149                                         cf_pred[n_ret] = res;
1150                                         n_ret++;
1151                                 }
1152                         }
1153                         if (n_ret > 0)
1154                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1155                         else
1156                                 phi = new_Bad();
1157                         res_pred[j] = phi;
1158                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1159                         if (get_nodes_block(phi) == post_bl) {
1160                                 set_Phi_next(phi, get_Block_phis(post_bl));
1161                                 set_Block_phis(post_bl, phi);
1162                         }
1163                 }
1164                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1165         } else {
1166                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1167         }
1168         /* handle the regular call */
1169         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1170
1171         /* For now, we cannot inline calls with value_base */
1172         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1173
1174         /* Finally the exception control flow.
1175            We have two (three) possible situations:
1176            First if the Call branches to an exception handler: We need to add a Phi node to
1177            collect the memory containing the exception objects.  Further we need
1178            to add another block to get a correct representation of this Phi.  To
1179            this block we add a Jmp that resolves into the X output of the Call
1180            when the Call is turned into a tuple.
1181            Second the Call branches to End, the exception is not handled.  Just
1182            add all inlined exception branches to the End node.
1183            Third: there is no Exception edge at all. Handle as case two. */
1184         if (exc_handling == exc_handler) {
1185                 n_exc = 0;
1186                 for (i = 0; i < arity; i++) {
1187                         ir_node *ret, *irn;
1188                         ret = get_Block_cfgpred(end_bl, i);
1189                         irn = skip_Proj(ret);
1190                         if (is_fragile_op(irn) || is_Raise(irn)) {
1191                                 cf_pred[n_exc] = ret;
1192                                 ++n_exc;
1193                         }
1194                 }
1195                 if (n_exc > 0) {
1196                         ir_node *block = new_Block(n_exc, cf_pred);
1197                         set_cur_block(block);
1198
1199                         set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1200                         /* The Phi for the memories with the exception objects */
1201                         n_exc = 0;
1202                         for (i = 0; i < arity; i++) {
1203                                 ir_node *ret;
1204                                 ret = skip_Proj(get_Block_cfgpred(end_bl, i));
1205                                 if (is_Call(ret)) {
1206                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 3);
1207                                         n_exc++;
1208                                 } else if (is_fragile_op(ret)) {
1209                                         /* We rely that all cfops have the memory output at the same position. */
1210                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 0);
1211                                         n_exc++;
1212                                 } else if (is_Raise(ret)) {
1213                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 1);
1214                                         n_exc++;
1215                                 }
1216                         }
1217                         set_Tuple_pred(call, pn_Call_M_except, new_Phi(n_exc, cf_pred, mode_M));
1218                 } else {
1219                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1220                         set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1221                 }
1222         } else {
1223                 ir_node *main_end_bl;
1224                 int main_end_bl_arity;
1225                 ir_node **end_preds;
1226
1227                 /* assert(exc_handling == 1 || no exceptions. ) */
1228                 n_exc = 0;
1229                 for (i = 0; i < arity; i++) {
1230                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1231                         ir_node *irn = skip_Proj(ret);
1232
1233                         if (is_fragile_op(irn) || is_Raise(irn)) {
1234                                 cf_pred[n_exc] = ret;
1235                                 n_exc++;
1236                         }
1237                 }
1238                 main_end_bl       = get_irg_end_block(irg);
1239                 main_end_bl_arity = get_irn_arity(main_end_bl);
1240                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
1241
1242                 for (i = 0; i < main_end_bl_arity; ++i)
1243                         end_preds[i] = get_irn_n(main_end_bl, i);
1244                 for (i = 0; i < n_exc; ++i)
1245                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1246                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1247                 set_Tuple_pred(call, pn_Call_X_except,  new_Bad());
1248                 set_Tuple_pred(call, pn_Call_M_except,  new_Bad());
1249                 free(end_preds);
1250         }
1251         free(res_pred);
1252         free(cf_pred);
1253
1254         /* --  Turn CSE back on. -- */
1255         set_optimize(rem_opt);
1256         current_ir_graph = rem;
1257
1258         return 1;
1259 }
1260
1261 /********************************************************************/
1262 /* Apply inlining to small methods.                                 */
1263 /********************************************************************/
1264
1265 static struct obstack  temp_obst;
1266
1267 /** Represents a possible inlinable call in a graph. */
1268 typedef struct _call_entry {
1269         ir_node    *call;       /**< The Call node. */
1270         ir_graph   *callee;     /**< The callee IR-graph. */
1271         list_head  list;        /**< List head for linking the next one. */
1272         int        loop_depth;  /**< The loop depth of this call. */
1273         int        benefice;    /**< The calculated benefice of this call. */
1274         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
1275         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
1276 } call_entry;
1277
1278 /**
1279  * environment for inlining small irgs
1280  */
1281 typedef struct _inline_env_t {
1282         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
1283         list_head      calls; /**< The call entry list. */
1284 } inline_env_t;
1285
1286 /**
1287  * Returns the irg called from a Call node. If the irg is not
1288  * known, NULL is returned.
1289  *
1290  * @param call  the call node
1291  */
1292 static ir_graph *get_call_called_irg(ir_node *call) {
1293         ir_node *addr;
1294
1295         addr = get_Call_ptr(call);
1296         if (is_Global(addr)) {
1297                 ir_entity *ent = get_Global_entity(addr);
1298                 return get_entity_irg(ent);
1299         }
1300
1301         return NULL;
1302 }
1303
1304 /**
1305  * Walker: Collect all calls to known graphs inside a graph.
1306  */
1307 static void collect_calls(ir_node *call, void *env) {
1308         (void) env;
1309         if (is_Call(call)) {
1310                 ir_graph *called_irg = get_call_called_irg(call);
1311
1312                 if (called_irg != NULL) {
1313                         /* The Call node calls a locally defined method.  Remember to inline. */
1314                         inline_env_t *ienv  = env;
1315                         call_entry   *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
1316                         entry->call       = call;
1317                         entry->callee     = called_irg;
1318                         entry->loop_depth = 0;
1319                         entry->benefice   = 0;
1320                         entry->local_adr  = 0;
1321                         entry->all_const  = 0;
1322
1323                         list_add_tail(&entry->list, &ienv->calls);
1324                 }
1325         }
1326 }
1327
1328 /**
1329  * Inlines all small methods at call sites where the called address comes
1330  * from a Const node that references the entity representing the called
1331  * method.
1332  * The size argument is a rough measure for the code size of the method:
1333  * Methods where the obstack containing the firm graph is smaller than
1334  * size are inlined.
1335  */
1336 void inline_small_irgs(ir_graph *irg, int size) {
1337         ir_graph *rem = current_ir_graph;
1338         inline_env_t env;
1339         call_entry *entry;
1340
1341         current_ir_graph = irg;
1342         /* Handle graph state */
1343         assert(get_irg_phase_state(irg) != phase_building);
1344         free_callee_info(irg);
1345
1346         /* Find Call nodes to inline.
1347            (We can not inline during a walk of the graph, as inlining the same
1348            method several times changes the visited flag of the walked graph:
1349            after the first inlining visited of the callee equals visited of
1350            the caller.  With the next inlining both are increased.) */
1351         obstack_init(&env.obst);
1352         INIT_LIST_HEAD(&env.calls);
1353         irg_walk_graph(irg, NULL, collect_calls, &env);
1354
1355         if (! list_empty(&env.calls)) {
1356                 /* There are calls to inline */
1357                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1358                 collect_phiprojs(irg);
1359
1360                 list_for_each_entry(call_entry, entry, &env.calls, list) {
1361                         ir_graph            *callee = entry->callee;
1362                         irg_inline_property prop    = get_irg_inline_property(callee);
1363
1364                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1365                                 /* do not inline forbidden / weak graphs */
1366                                 continue;
1367                         }
1368
1369                         if (prop >= irg_inline_forced ||
1370                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1371                                 inline_method(entry->call, callee);
1372                         }
1373                 }
1374                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1375         }
1376         obstack_free(&env.obst, NULL);
1377         current_ir_graph = rem;
1378 }
1379
1380 /**
1381  * Environment for inlining irgs.
1382  */
1383 typedef struct {
1384         list_head calls;             /**< List of of all call nodes in this graph. */
1385         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
1386         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1387         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
1388         unsigned  n_nodes_orig;      /**< for statistics */
1389         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
1390         unsigned  n_call_nodes_orig; /**< for statistics */
1391         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
1392         unsigned  n_callers_orig;    /**< for statistics */
1393         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
1394         unsigned  local_vars:1;      /**< Set, if an inlined function got the address of a local variable. */
1395         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
1396 } inline_irg_env;
1397
1398 /**
1399  * Allocate a new environment for inlining.
1400  */
1401 static inline_irg_env *alloc_inline_irg_env(void) {
1402         inline_irg_env *env    = obstack_alloc(&temp_obst, sizeof(*env));
1403         INIT_LIST_HEAD(&env->calls);
1404         env->local_weights     = NULL;
1405         env->n_nodes           = -2; /* do not count count Start, End */
1406         env->n_blocks          = -2; /* do not count count Start, End Block */
1407         env->n_nodes_orig      = -2; /* do not count Start, End */
1408         env->n_call_nodes      = 0;
1409         env->n_call_nodes_orig = 0;
1410         env->n_callers         = 0;
1411         env->n_callers_orig    = 0;
1412         env->got_inline        = 0;
1413         env->local_vars        = 0;
1414         env->recursive         = 0;
1415         return env;
1416 }
1417
1418 typedef struct walker_env {
1419         inline_irg_env *x;     /**< the inline environment */
1420         char ignore_runtime;   /**< the ignore runtime flag */
1421         char ignore_callers;   /**< if set, do change callers data */
1422 } wenv_t;
1423
1424 /**
1425  * post-walker: collect all calls in the inline-environment
1426  * of a graph and sum some statistics.
1427  */
1428 static void collect_calls2(ir_node *call, void *ctx) {
1429         wenv_t         *env = ctx;
1430         inline_irg_env *x = env->x;
1431         ir_opcode      code = get_irn_opcode(call);
1432         ir_graph       *callee;
1433         call_entry     *entry;
1434
1435         /* count meaningful nodes in irg */
1436         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1437                 if (code != iro_Block) {
1438                         ++x->n_nodes;
1439                         ++x->n_nodes_orig;
1440                 } else {
1441                         ++x->n_blocks;
1442                 }
1443         }
1444
1445         if (code != iro_Call) return;
1446
1447         /* check, if it's a runtime call */
1448         if (env->ignore_runtime) {
1449                 ir_node *symc = get_Call_ptr(call);
1450
1451                 if (is_Global(symc)) {
1452                         ir_entity *ent = get_Global_entity(symc);
1453
1454                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1455                                 return;
1456                 }
1457         }
1458
1459         /* collect all call nodes */
1460         ++x->n_call_nodes;
1461         ++x->n_call_nodes_orig;
1462
1463         callee = get_call_called_irg(call);
1464         if (callee != NULL) {
1465                 if (! env->ignore_callers) {
1466                         inline_irg_env *callee_env = get_irg_link(callee);
1467                         /* count all static callers */
1468                         ++callee_env->n_callers;
1469                         ++callee_env->n_callers_orig;
1470                 }
1471                 if (callee == current_ir_graph)
1472                         x->recursive = 1;
1473
1474                 /* link it in the list of possible inlinable entries */
1475                 entry = obstack_alloc(&temp_obst, sizeof(*entry));
1476                 entry->call       = call;
1477                 entry->callee     = callee;
1478                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1479                 entry->benefice   = 0;
1480                 entry->local_adr  = 0;
1481                 entry->all_const  = 0;
1482
1483                 list_add_tail(&entry->list, &x->calls);
1484         }
1485 }
1486
1487 /**
1488  * Returns TRUE if the number of callers is 0 in the irg's environment,
1489  * hence this irg is a leave.
1490  */
1491 inline static int is_leave(ir_graph *irg) {
1492         inline_irg_env *env = get_irg_link(irg);
1493         return env->n_call_nodes == 0;
1494 }
1495
1496 /**
1497  * Returns TRUE if the number of nodes in the callee is
1498  * smaller then size in the irg's environment.
1499  */
1500 inline static int is_smaller(ir_graph *callee, unsigned size) {
1501         inline_irg_env *env = get_irg_link(callee);
1502         return env->n_nodes < size;
1503 }
1504
1505 /**
1506  * Duplicate a call entry.
1507  *
1508  * @param entry     the original entry to duplicate
1509  * @param new_call  the new call node
1510  * @param loop_depth_delta
1511  *                  delta value for the loop depth
1512  */
1513 static call_entry *duplicate_call_entry(const call_entry *entry,
1514                                         ir_node *new_call, int loop_depth_delta) {
1515         call_entry *nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1516         nentry->call       = new_call;
1517         nentry->callee     = entry->callee;
1518         nentry->benefice   = entry->benefice;
1519         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1520         nentry->local_adr  = entry->local_adr;
1521         nentry->all_const  = entry->all_const;
1522
1523         return nentry;
1524 }
1525
1526 /**
1527  * Append all call nodes of the source environment to the nodes of in the destination
1528  * environment.
1529  *
1530  * @param dst         destination environment
1531  * @param src         source environment
1532  * @param loop_depth  the loop depth of the call that is replaced by the src list
1533  */
1534 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth) {
1535         call_entry *entry, *nentry;
1536
1537         /* Note that the src list points to Call nodes in the inlined graph, but
1538            we need Call nodes in our graph. Luckily the inliner leaves this information
1539            in the link field. */
1540         list_for_each_entry(call_entry, entry, &src->calls, list) {
1541                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
1542                 list_add_tail(&nentry->list, &dst->calls);
1543         }
1544         dst->n_call_nodes += src->n_call_nodes;
1545         dst->n_nodes      += src->n_nodes;
1546 }
1547
1548 /*
1549  * Inlines small leave methods at call sites where the called address comes
1550  * from a Const node that references the entity representing the called
1551  * method.
1552  * The size argument is a rough measure for the code size of the method:
1553  * Methods where the obstack containing the firm graph is smaller than
1554  * size are inlined.
1555  */
1556 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1557                             unsigned size, int ignore_runtime)
1558 {
1559         inline_irg_env   *env;
1560         ir_graph         *irg;
1561         int              i, n_irgs;
1562         ir_graph         *rem;
1563         int              did_inline;
1564         wenv_t           wenv;
1565         call_entry       *entry, *next;
1566         const call_entry *centry;
1567         pmap             *copied_graphs;
1568         pmap_entry       *pm_entry;
1569
1570         rem = current_ir_graph;
1571         obstack_init(&temp_obst);
1572
1573         /* a map for the copied graphs, used to inline recursive calls */
1574         copied_graphs = pmap_create();
1575
1576         /* extend all irgs by a temporary data structure for inlining. */
1577         n_irgs = get_irp_n_irgs();
1578         for (i = 0; i < n_irgs; ++i)
1579                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1580
1581         /* Pre-compute information in temporary data structure. */
1582         wenv.ignore_runtime = ignore_runtime;
1583         wenv.ignore_callers = 0;
1584         for (i = 0; i < n_irgs; ++i) {
1585                 ir_graph *irg = get_irp_irg(i);
1586
1587                 assert(get_irg_phase_state(irg) != phase_building);
1588                 free_callee_info(irg);
1589
1590                 assure_cf_loop(irg);
1591                 wenv.x = get_irg_link(irg);
1592                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1593         }
1594
1595         /* -- and now inline. -- */
1596
1597         /* Inline leaves recursively -- we might construct new leaves. */
1598         do {
1599                 did_inline = 0;
1600
1601                 for (i = 0; i < n_irgs; ++i) {
1602                         ir_node *call;
1603                         int phiproj_computed = 0;
1604
1605                         current_ir_graph = get_irp_irg(i);
1606                         env              = get_irg_link(current_ir_graph);
1607
1608                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1609                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1610                                 ir_graph            *callee;
1611                                 irg_inline_property  prop;
1612
1613                                 if (env->n_nodes > maxsize)
1614                                         break;
1615
1616                                 call   = entry->call;
1617                                 callee = entry->callee;
1618
1619                                 prop = get_irg_inline_property(callee);
1620                                 if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1621                                         /* do not inline forbidden / weak graphs */
1622                                         continue;
1623                                 }
1624
1625                                 if (is_leave(callee) && (
1626                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1627                                         if (!phiproj_computed) {
1628                                                 phiproj_computed = 1;
1629                                                 collect_phiprojs(current_ir_graph);
1630                                         }
1631                                         did_inline = inline_method(call, callee);
1632
1633                                         if (did_inline) {
1634                                                 inline_irg_env *callee_env = get_irg_link(callee);
1635
1636                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1637                                                 phiproj_computed = 0;
1638
1639                                                 /* Do some statistics */
1640                                                 env->got_inline = 1;
1641                                                 --env->n_call_nodes;
1642                                                 env->n_nodes += callee_env->n_nodes;
1643                                                 --callee_env->n_callers;
1644
1645                                                 /* remove this call from the list */
1646                                                 list_del(&entry->list);
1647                                                 continue;
1648                                         }
1649                                 }
1650                         }
1651                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1652                 }
1653         } while (did_inline);
1654
1655         /* inline other small functions. */
1656         for (i = 0; i < n_irgs; ++i) {
1657                 ir_node *call;
1658                 int phiproj_computed = 0;
1659
1660                 current_ir_graph = get_irp_irg(i);
1661                 env              = get_irg_link(current_ir_graph);
1662
1663                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1664
1665                 /* note that the list of possible calls is updated during the process */
1666                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1667                         irg_inline_property prop;
1668                         ir_graph            *callee;
1669                         pmap_entry          *e;
1670
1671                         call   = entry->call;
1672                         callee = entry->callee;
1673
1674                         prop = get_irg_inline_property(callee);
1675                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1676                                 /* do not inline forbidden / weak graphs */
1677                                 continue;
1678                         }
1679
1680                         e = pmap_find(copied_graphs, callee);
1681                         if (e != NULL) {
1682                                 /*
1683                                  * Remap callee if we have a copy.
1684                                  * FIXME: Should we do this only for recursive Calls ?
1685                                  */
1686                                 callee = e->value;
1687                         }
1688
1689                         if (prop >= irg_inline_forced ||
1690                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1691                                 if (current_ir_graph == callee) {
1692                                         /*
1693                                          * Recursive call: we cannot directly inline because we cannot walk
1694                                          * the graph and change it. So we have to make a copy of the graph
1695                                          * first.
1696                                          */
1697
1698                                         inline_irg_env *callee_env;
1699                                         ir_graph       *copy;
1700
1701                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1702
1703                                         /*
1704                                          * No copy yet, create one.
1705                                          * Note that recursive methods are never leaves, so it is sufficient
1706                                          * to test this condition here.
1707                                          */
1708                                         copy = create_irg_copy(callee);
1709
1710                                         /* create_irg_copy() destroys the Proj links, recompute them */
1711                                         phiproj_computed = 0;
1712
1713                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1714
1715                                         /* allocate new environment */
1716                                         callee_env = alloc_inline_irg_env();
1717                                         set_irg_link(copy, callee_env);
1718
1719                                         assure_cf_loop(copy);
1720                                         wenv.x              = callee_env;
1721                                         wenv.ignore_callers = 1;
1722                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1723
1724                                         /*
1725                                          * Enter the entity of the original graph. This is needed
1726                                          * for inline_method(). However, note that ent->irg still points
1727                                          * to callee, NOT to copy.
1728                                          */
1729                                         set_irg_entity(copy, get_irg_entity(callee));
1730
1731                                         pmap_insert(copied_graphs, callee, copy);
1732                                         callee = copy;
1733
1734                                         /* we have only one caller: the original graph */
1735                                         callee_env->n_callers      = 1;
1736                                         callee_env->n_callers_orig = 1;
1737                                 }
1738                                 if (! phiproj_computed) {
1739                                         phiproj_computed = 1;
1740                                         collect_phiprojs(current_ir_graph);
1741                                 }
1742                                 did_inline = inline_method(call, callee);
1743                                 if (did_inline) {
1744                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1745
1746                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1747                                         phiproj_computed = 0;
1748
1749                                         /* callee was inline. Append it's call list. */
1750                                         env->got_inline = 1;
1751                                         --env->n_call_nodes;
1752                                         append_call_list(env, callee_env, entry->loop_depth);
1753                                         --callee_env->n_callers;
1754
1755                                         /* after we have inlined callee, all called methods inside callee
1756                                            are now called once more */
1757                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1758                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1759                                                 ++penv->n_callers;
1760                                         }
1761
1762                                         /* remove this call from the list */
1763                                         list_del(&entry->list);
1764                                         continue;
1765                                 }
1766                         }
1767                 }
1768                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1769         }
1770
1771         for (i = 0; i < n_irgs; ++i) {
1772                 irg = get_irp_irg(i);
1773                 env = get_irg_link(irg);
1774
1775                 if (env->got_inline) {
1776                         optimize_graph_df(irg);
1777                         optimize_cf(irg);
1778                 }
1779                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1780                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1781                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1782                         env->n_callers_orig, env->n_callers,
1783                         get_entity_name(get_irg_entity(irg))));
1784                 }
1785         }
1786
1787         /* kill the copied graphs: we don't need them anymore */
1788         foreach_pmap(copied_graphs, pm_entry) {
1789                 ir_graph *copy = pm_entry->value;
1790
1791                 /* reset the entity, otherwise it will be deleted in the next step ... */
1792                 set_irg_entity(copy, NULL);
1793                 free_ir_graph(copy);
1794         }
1795         pmap_destroy(copied_graphs);
1796
1797         obstack_free(&temp_obst, NULL);
1798         current_ir_graph = rem;
1799 }
1800
1801 /**
1802  * Calculate the parameter weights for transmitting the address of a local variable.
1803  */
1804 static unsigned calc_method_local_weight(ir_node *arg) {
1805         int      i, j, k;
1806         unsigned v, weight = 0;
1807
1808         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1809                 ir_node *succ = get_irn_out(arg, i);
1810
1811                 switch (get_irn_opcode(succ)) {
1812                 case iro_Load:
1813                 case iro_Store:
1814                         /* Loads and Store can be removed */
1815                         weight += 3;
1816                         break;
1817                 case iro_Sel:
1818                         /* check if all args are constant */
1819                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1820                                 ir_node *idx = get_Sel_index(succ, j);
1821                                 if (! is_Const(idx))
1822                                         return 0;
1823                         }
1824                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1825                            some unsupported node. */
1826                         v = calc_method_local_weight(succ);
1827                         if (v == 0)
1828                                 return 0;
1829                         /* we can kill one Sel with constant indexes, this is cheap */
1830                         weight += v + 1;
1831                         break;
1832                 case iro_Id:
1833                         /* when looking backward we might find Id nodes */
1834                         weight += calc_method_local_weight(succ);
1835                         break;
1836                 case iro_Tuple:
1837                         /* unoptimized tuple */
1838                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1839                                 ir_node *pred = get_Tuple_pred(succ, j);
1840                                 if (pred == arg) {
1841                                         /* look for Proj(j) */
1842                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1843                                                 ir_node *succ_succ = get_irn_out(succ, k);
1844                                                 if (is_Proj(succ_succ)) {
1845                                                         if (get_Proj_proj(succ_succ) == j) {
1846                                                                 /* found */
1847                                                                 weight += calc_method_local_weight(succ_succ);
1848                                                         }
1849                                                 } else {
1850                                                         /* this should NOT happen */
1851                                                         return 0;
1852                                                 }
1853                                         }
1854                                 }
1855                         }
1856                         break;
1857                 default:
1858                         /* any other node: unsupported yet or bad. */
1859                         return 0;
1860                 }
1861         }
1862         return weight;
1863 }
1864
1865 /**
1866  * Calculate the parameter weights for transmitting the address of a local variable.
1867  */
1868 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
1869         ir_entity *ent = get_irg_entity(irg);
1870         ir_type  *mtp;
1871         int      nparams, i, proj_nr;
1872         ir_node  *irg_args, *arg;
1873
1874         mtp      = get_entity_type(ent);
1875         nparams  = get_method_n_params(mtp);
1876
1877         /* allocate a new array. currently used as 'analysed' flag */
1878         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1879
1880         /* If the method haven't parameters we have nothing to do. */
1881         if (nparams <= 0)
1882                 return;
1883
1884         assure_irg_outs(irg);
1885         irg_args = get_irg_args(irg);
1886         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1887                 arg     = get_irn_out(irg_args, i);
1888                 proj_nr = get_Proj_proj(arg);
1889                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1890         }
1891 }
1892
1893 /**
1894  * Calculate the benefice for transmitting an local variable address.
1895  * After inlining, the local variable might be transformed into a
1896  * SSA variable by scalar_replacement().
1897  */
1898 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
1899         inline_irg_env *env = get_irg_link(callee);
1900
1901         if (env->local_weights != NULL) {
1902                 if (pos < ARR_LEN(env->local_weights))
1903                         return env->local_weights[pos];
1904                 return 0;
1905         }
1906
1907         analyze_irg_local_weights(env, callee);
1908
1909         if (pos < ARR_LEN(env->local_weights))
1910                 return env->local_weights[pos];
1911         return 0;
1912 }
1913
1914 /**
1915  * Calculate a benefice value for inlining the given call.
1916  *
1917  * @param call       the call node we have to inspect
1918  * @param callee     the called graph
1919  */
1920 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1921 {
1922         ir_node   *call = entry->call;
1923         ir_entity *ent  = get_irg_entity(callee);
1924         ir_node   *frame_ptr;
1925         ir_type   *mtp;
1926         int       weight = 0;
1927         int       i, n_params, all_const;
1928         unsigned  cc, v;
1929         irg_inline_property prop;
1930
1931         inline_irg_env *callee_env;
1932
1933         prop = get_irg_inline_property(callee);
1934         if (prop == irg_inline_forbidden) {
1935                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1936                     call, callee));
1937                 return entry->benefice = INT_MIN;
1938         }
1939
1940         if (get_irg_additional_properties(callee) & (mtp_property_noreturn | mtp_property_weak)) {
1941                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1942                     call, callee));
1943                 return entry->benefice = INT_MIN;
1944         }
1945
1946         /* costs for every passed parameter */
1947         n_params = get_Call_n_params(call);
1948         mtp      = get_entity_type(ent);
1949         cc       = get_method_calling_convention(mtp);
1950         if (cc & cc_reg_param) {
1951                 /* register parameter, smaller costs for register parameters */
1952                 int max_regs = cc & ~cc_bits;
1953
1954                 if (max_regs < n_params)
1955                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1956                 else
1957                         weight += n_params * 2;
1958         } else {
1959                 /* parameters are passed an stack */
1960                 weight += 5 * n_params;
1961         }
1962
1963         /* constant parameters improve the benefice */
1964         frame_ptr = get_irg_frame(current_ir_graph);
1965         all_const = 1;
1966         for (i = 0; i < n_params; ++i) {
1967                 ir_node *param = get_Call_param(call, i);
1968
1969                 if (is_Const(param)) {
1970                         weight += get_method_param_weight(ent, i);
1971                 } else {
1972                         all_const = 0;
1973                         if (is_SymConst(param))
1974                                 weight += get_method_param_weight(ent, i);
1975                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1976                                 /*
1977                                  * An address of a local variable is transmitted. After
1978                                  * inlining, scalar_replacement might be able to remove the
1979                                  * local variable, so honor this.
1980                                  */
1981                                 v = get_method_local_adress_weight(callee, i);
1982                                 weight += v;
1983                                 if (v > 0)
1984                                         entry->local_adr = 1;
1985                         }
1986                 }
1987         }
1988         entry->all_const = all_const;
1989
1990         callee_env = get_irg_link(callee);
1991         if (callee_env->n_callers == 1 &&
1992             callee != current_ir_graph &&
1993                 get_entity_visibility(ent) == visibility_local) {
1994                 weight += 700;
1995         }
1996
1997         /* give a bonus for functions with one block */
1998         if (callee_env->n_blocks == 1)
1999                 weight = weight * 3 / 2;
2000
2001         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
2002         if (callee_env->n_nodes < 30 && !callee_env->recursive)
2003                 weight += 2000;
2004
2005         /* and finally for leaves: they do not increase the register pressure
2006            because of callee safe registers */
2007         if (callee_env->n_call_nodes == 0)
2008                 weight += 400;
2009
2010         /** it's important to inline inner loops first */
2011         if (entry->loop_depth > 30)
2012                 weight += 30 * 1024;
2013         else
2014                 weight += entry->loop_depth * 1024;
2015
2016         /*
2017          * All arguments constant is probably a good sign, give an extra bonus
2018          */
2019         if (all_const)
2020                 weight += 1024;
2021
2022         return entry->benefice = weight;
2023 }
2024
2025 static ir_graph **irgs;
2026 static int      last_irg;
2027
2028 /**
2029  * Callgraph walker, collect all visited graphs.
2030  */
2031 static void callgraph_walker(ir_graph *irg, void *data) {
2032         (void) data;
2033         irgs[last_irg++] = irg;
2034 }
2035
2036 /**
2037  * Creates an inline order for all graphs.
2038  *
2039  * @return the list of graphs.
2040  */
2041 static ir_graph **create_irg_list(void) {
2042         ir_entity **free_methods;
2043         int       arr_len;
2044         int       n_irgs = get_irp_n_irgs();
2045
2046         cgana(&arr_len, &free_methods);
2047         xfree(free_methods);
2048
2049         compute_callgraph();
2050
2051         last_irg = 0;
2052         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
2053
2054         callgraph_walk(NULL, callgraph_walker, NULL);
2055         assert(n_irgs == last_irg);
2056
2057         return irgs;
2058 }
2059
2060 /**
2061  * Push a call onto the priority list if its benefice is big enough.
2062  *
2063  * @param pqueue   the priority queue of calls
2064  * @param call     the call entry
2065  * @param inlien_threshold
2066  *                 the threshold value
2067  */
2068 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2069                             int inline_threshold)
2070 {
2071         ir_graph            *callee  = call->callee;
2072         irg_inline_property prop     = get_irg_inline_property(callee);
2073         int                 benefice = calc_inline_benefice(call, callee);
2074
2075         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2076             get_irn_irg(call->call), call->call, callee, benefice));
2077
2078         if (prop < irg_inline_forced && benefice < inline_threshold) {
2079                 return;
2080         }
2081
2082         pqueue_put(pqueue, call, benefice);
2083 }
2084
2085 /**
2086  * Try to inline calls into a graph.
2087  *
2088  * @param irg      the graph into which we inline
2089  * @param maxsize  do NOT inline if the size of irg gets
2090  *                 bigger than this amount
2091  * @param inline_threshold
2092  *                 threshold value for inline decision
2093  * @param copied_graphs
2094  *                 map containing copied of recursive graphs
2095  */
2096 static void inline_into(ir_graph *irg, unsigned maxsize,
2097                         int inline_threshold, pmap *copied_graphs)
2098 {
2099         int            phiproj_computed = 0;
2100         inline_irg_env *env = get_irg_link(irg);
2101         call_entry     *curr_call;
2102         wenv_t         wenv;
2103         pqueue_t       *pqueue;
2104
2105         if (env->n_call_nodes == 0)
2106                 return;
2107
2108         if (env->n_nodes > maxsize) {
2109                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2110                 return;
2111         }
2112
2113         current_ir_graph = irg;
2114         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2115
2116         /* put irgs into the pqueue */
2117         pqueue = new_pqueue();
2118
2119         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
2120                 assert(is_Call(curr_call->call));
2121                 maybe_push_call(pqueue, curr_call, inline_threshold);
2122         }
2123
2124         /* note that the list of possible calls is updated during the process */
2125         while (!pqueue_empty(pqueue)) {
2126                 int                 did_inline;
2127                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
2128                 ir_graph            *callee     = curr_call->callee;
2129                 ir_node             *call_node  = curr_call->call;
2130                 inline_irg_env      *callee_env = get_irg_link(callee);
2131                 irg_inline_property prop        = get_irg_inline_property(callee);
2132                 int                 loop_depth;
2133                 const call_entry    *centry;
2134                 pmap_entry          *e;
2135
2136                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
2137                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2138                                                 env->n_nodes, callee, callee_env->n_nodes));
2139                         continue;
2140                 }
2141
2142                 e = pmap_find(copied_graphs, callee);
2143                 if (e != NULL) {
2144                         int benefice = curr_call->benefice;
2145                         /*
2146                          * Reduce the weight for recursive function IFF not all arguments are const.
2147                          * inlining recursive functions is rarely good.
2148                          */
2149                         if (!curr_call->all_const)
2150                                 benefice -= 2000;
2151                         if (benefice < inline_threshold)
2152                                 continue;
2153
2154                         /*
2155                          * Remap callee if we have a copy.
2156                          */
2157                         callee     = e->value;
2158                         callee_env = get_irg_link(callee);
2159                 }
2160
2161                 if (current_ir_graph == callee) {
2162                         /*
2163                          * Recursive call: we cannot directly inline because we cannot
2164                          * walk the graph and change it. So we have to make a copy of
2165                          * the graph first.
2166                          */
2167                         int benefice = curr_call->benefice;
2168                         ir_graph *copy;
2169
2170                         /*
2171                          * Reduce the weight for recursive function IFF not all arguments are const.
2172                          * inlining recursive functions is rarely good.
2173                          */
2174                         if (!curr_call->all_const)
2175                                 benefice -= 2000;
2176                         if (benefice < inline_threshold)
2177                                 continue;
2178
2179                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2180
2181                         /*
2182                          * No copy yet, create one.
2183                          * Note that recursive methods are never leaves, so it is
2184                          * sufficient to test this condition here.
2185                          */
2186                         copy = create_irg_copy(callee);
2187
2188                         /* create_irg_copy() destroys the Proj links, recompute them */
2189                         phiproj_computed = 0;
2190
2191                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2192
2193                         /* allocate a new environment */
2194                         callee_env = alloc_inline_irg_env();
2195                         set_irg_link(copy, callee_env);
2196
2197                         assure_cf_loop(copy);
2198                         wenv.x              = callee_env;
2199                         wenv.ignore_callers = 1;
2200                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2201
2202                         /*
2203                          * Enter the entity of the original graph. This is needed
2204                          * for inline_method(). However, note that ent->irg still points
2205                          * to callee, NOT to copy.
2206                          */
2207                         set_irg_entity(copy, get_irg_entity(callee));
2208
2209                         pmap_insert(copied_graphs, callee, copy);
2210                         callee = copy;
2211
2212                         /* we have only one caller: the original graph */
2213                         callee_env->n_callers      = 1;
2214                         callee_env->n_callers_orig = 1;
2215                 }
2216                 if (! phiproj_computed) {
2217                         phiproj_computed = 1;
2218                         collect_phiprojs(current_ir_graph);
2219                 }
2220                 did_inline = inline_method(call_node, callee);
2221                 if (!did_inline)
2222                         continue;
2223
2224                 /* call was inlined, Phi/Projs for current graph must be recomputed */
2225                 phiproj_computed = 0;
2226
2227                 /* remove it from the caller list */
2228                 list_del(&curr_call->list);
2229
2230                 /* callee was inline. Append it's call list. */
2231                 env->got_inline = 1;
2232                 if (curr_call->local_adr)
2233                         env->local_vars = 1;
2234                 --env->n_call_nodes;
2235
2236                 /* we just generate a bunch of new calls */
2237                 loop_depth = curr_call->loop_depth;
2238                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
2239                         inline_irg_env *penv = get_irg_link(centry->callee);
2240                         ir_node        *new_call;
2241                         call_entry     *new_entry;
2242
2243                         /* after we have inlined callee, all called methods inside
2244                          * callee are now called once more */
2245                         ++penv->n_callers;
2246
2247                         /* Note that the src list points to Call nodes in the inlined graph,
2248                          * but we need Call nodes in our graph. Luckily the inliner leaves
2249                          * this information in the link field. */
2250                         new_call = get_irn_link(centry->call);
2251                         assert(is_Call(new_call));
2252
2253                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
2254                         list_add_tail(&new_entry->list, &env->calls);
2255                         maybe_push_call(pqueue, new_entry, inline_threshold);
2256                 }
2257
2258                 env->n_call_nodes += callee_env->n_call_nodes;
2259                 env->n_nodes += callee_env->n_nodes;
2260                 --callee_env->n_callers;
2261         }
2262         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2263         del_pqueue(pqueue);
2264 }
2265
2266 /*
2267  * Heuristic inliner. Calculates a benefice value for every call and inlines
2268  * those calls with a value higher than the threshold.
2269  */
2270 void inline_functions(unsigned maxsize, int inline_threshold) {
2271         inline_irg_env   *env;
2272         int              i, n_irgs;
2273         ir_graph         *rem;
2274         wenv_t           wenv;
2275         pmap             *copied_graphs;
2276         pmap_entry       *pm_entry;
2277         ir_graph         **irgs;
2278
2279         rem = current_ir_graph;
2280         obstack_init(&temp_obst);
2281
2282         irgs = create_irg_list();
2283
2284         /* a map for the copied graphs, used to inline recursive calls */
2285         copied_graphs = pmap_create();
2286
2287         /* extend all irgs by a temporary data structure for inlining. */
2288         n_irgs = get_irp_n_irgs();
2289         for (i = 0; i < n_irgs; ++i)
2290                 set_irg_link(irgs[i], alloc_inline_irg_env());
2291
2292         /* Pre-compute information in temporary data structure. */
2293         wenv.ignore_runtime = 0;
2294         wenv.ignore_callers = 0;
2295         for (i = 0; i < n_irgs; ++i) {
2296                 ir_graph *irg = irgs[i];
2297
2298                 free_callee_info(irg);
2299
2300                 wenv.x = get_irg_link(irg);
2301                 assure_cf_loop(irg);
2302                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2303         }
2304
2305         /* -- and now inline. -- */
2306         for (i = 0; i < n_irgs; ++i) {
2307                 ir_graph *irg = irgs[i];
2308
2309                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2310         }
2311
2312         for (i = 0; i < n_irgs; ++i) {
2313                 ir_graph *irg = irgs[i];
2314
2315                 env = get_irg_link(irg);
2316                 if (env->got_inline) {
2317                         /* this irg got calls inlined: optimize it */
2318                         if (get_opt_combo()) {
2319                                 if (env->local_vars) {
2320                                         scalar_replacement_opt(irg);
2321                                 }
2322                                 combo(irg);
2323                         } else {
2324                                 if (env->local_vars) {
2325                                         if (scalar_replacement_opt(irg)) {
2326                                                 optimize_graph_df(irg);
2327                                         }
2328                                 }
2329                                 optimize_cf(irg);
2330                         }
2331                 }
2332                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2333                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2334                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2335                         env->n_callers_orig, env->n_callers,
2336                         get_entity_name(get_irg_entity(irg))));
2337                 }
2338         }
2339
2340         /* kill the copied graphs: we don't need them anymore */
2341         foreach_pmap(copied_graphs, pm_entry) {
2342                 ir_graph *copy = pm_entry->value;
2343
2344                 /* reset the entity, otherwise it will be deleted in the next step ... */
2345                 set_irg_entity(copy, NULL);
2346                 free_ir_graph(copy);
2347         }
2348         pmap_destroy(copied_graphs);
2349
2350         xfree(irgs);
2351
2352         obstack_free(&temp_obst, NULL);
2353         current_ir_graph = rem;
2354 }
2355
2356 void firm_init_inline(void) {
2357         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2358 }