nsz Git - libfirm/blob - ir/opt/opt_inline.c

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief    Dead node elimination and Procedure Inlining.
  23  * @author   Michael Beck, Goetz Lindenmaier
  24  * @version  $Id$
  25  */
  26 #ifdef HAVE_CONFIG_H
  27 # include "config.h"
  28 #endif
  29
  30 #include <limits.h>
  31 #include <assert.h>
  32
  33 #include "irnode_t.h"
  34 #include "irgraph_t.h"
  35 #include "irprog_t.h"
  36
  37 #include "iroptimize.h"
  38 #include "ircons_t.h"
  39 #include "iropt_t.h"
  40 #include "irgopt.h"
  41 #include "irgmod.h"
  42 #include "irgwalk.h"
  43
  44 #include "adt/array.h"
  45 #include "adt/pset.h"
  46 #include "adt/pmap.h"
  47 #include "adt/pdeq.h"
  48 #include "adt/xmalloc.h"
  49 #include "adt/pqueue.h"
  50
  51 #include "irouts.h"
  52 #include "irloop_t.h"
  53 #include "irbackedge_t.h"
  54 #include "opt_inline_t.h"
  55 #include "cgana.h"
  56 #include "trouts.h"
  57 #include "error.h"
  58
  59 #include "analyze_irg_args.h"
  60 #include "iredges_t.h"
  61 #include "irflag_t.h"
  62 #include "irhooks.h"
  63 #include "irtools.h"
  64 #include "iropt_dbg.h"
  65
  66 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  67
  68 /*------------------------------------------------------------------*/
  69 /* Routines for dead node elimination / copying garbage collection  */
  70 /* of the obstack.                                                  */
  71 /*------------------------------------------------------------------*/
  72
  73 /**
  74  * Remember the new node in the old node by using a field all nodes have.
  75  */
  76 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
  77
  78 /**
  79  * Get this new node, before the old node is forgotten.
  80  */
  81 #define get_new_node(oldn) get_irn_link(oldn)
  82
  83 /**
  84  * Check if a new node was set.
  85  */
  86 #define has_new_node(n) (get_new_node(n) != NULL)
  87
  88 /**
  89  * We use the block_visited flag to mark that we have computed the
  90  * number of useful predecessors for this block.
  91  * Further we encode the new arity in this flag in the old blocks.
  92  * Remembering the arity is useful, as it saves a lot of pointer
  93  * accesses.  This function is called for all Phi and Block nodes
  94  * in a Block.
  95  */
  96 static INLINE int
  97 compute_new_arity(ir_node *b) {
  98         int i, res, irn_arity;
  99         int irg_v, block_v;
 100
 101         irg_v = get_irg_block_visited(current_ir_graph);
 102         block_v = get_Block_block_visited(b);
 103         if (block_v >= irg_v) {
 104                 /* we computed the number of preds for this block and saved it in the
 105                    block_v flag */
 106                 return block_v - irg_v;
 107         } else {
 108                 /* compute the number of good predecessors */
 109                 res = irn_arity = get_irn_arity(b);
 110                 for (i = 0; i < irn_arity; i++)
 111                         if (is_Bad(get_irn_n(b, i))) res--;
 112                         /* save it in the flag. */
 113                         set_Block_block_visited(b, irg_v + res);
 114                         return res;
 115         }
 116 }
 117
 118 /**
 119  * Copies the node to the new obstack. The Ins of the new node point to
 120  * the predecessors on the old obstack.  For block/phi nodes not all
 121  * predecessors might be copied.  n->link points to the new node.
 122  * For Phi and Block nodes the function allocates in-arrays with an arity
 123  * only for useful predecessors.  The arity is determined by counting
 124  * the non-bad predecessors of the block.
 125  *
 126  * @param n    The node to be copied
 127  * @param env  if non-NULL, the node number attribute will be copied to the new node
 128  *
 129  * Note: Also used for loop unrolling.
 130  */
 131 static void copy_node(ir_node *n, void *env) {
 132         ir_node *nn, *block;
 133         int new_arity;
 134         ir_op *op = get_irn_op(n);
 135         (void) env;
 136
 137         if (op == op_Bad) {
 138                 /* node copied already */
 139                 return;
 140         } else if (op == op_Block) {
 141                 block = NULL;
 142                 new_arity = compute_new_arity(n);
 143                 n->attr.block.graph_arr = NULL;
 144         } else {
 145                 block = get_nodes_block(n);
 146                 if (op == op_Phi) {
 147                         new_arity = compute_new_arity(block);
 148                 } else {
 149                         new_arity = get_irn_arity(n);
 150                 }
 151         }
 152         nn = new_ir_node(get_irn_dbg_info(n),
 153                 current_ir_graph,
 154                 block,
 155                 op,
 156                 get_irn_mode(n),
 157                 new_arity,
 158                 get_irn_in(n) + 1);
 159         /* Copy the attributes.  These might point to additional data.  If this
 160            was allocated on the old obstack the pointers now are dangling.  This
 161            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
 162         if (op == op_Block) {
 163                 /* we cannot allow blocks WITHOUT macroblock input */
 164                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
 165         }
 166         copy_node_attr(n, nn);
 167
 168 #ifdef DEBUG_libfirm
 169         {
 170                 int copy_node_nr = env != NULL;
 171                 if (copy_node_nr) {
 172                         /* for easier debugging, we want to copy the node numbers too */
 173                         nn->node_nr = n->node_nr;
 174                 }
 175         }
 176 #endif
 177
 178         set_new_node(n, nn);
 179         hook_dead_node_elim_subst(current_ir_graph, n, nn);
 180 }
 181
 182 /**
 183  * Copies new predecessors of old node to new node remembered in link.
 184  * Spare the Bad predecessors of Phi and Block nodes.
 185  */
 186 static void copy_preds(ir_node *n, void *env) {
 187         ir_node *nn, *block;
 188         int i, j, irn_arity;
 189         (void) env;
 190
 191         nn = get_new_node(n);
 192
 193         if (is_Block(n)) {
 194                 /* copy the macro block header */
 195                 ir_node *mbh = get_Block_MacroBlock(n);
 196
 197                 if (mbh == n) {
 198                         /* this block is a macroblock header */
 199                         set_Block_MacroBlock(nn, nn);
 200                 } else {
 201                         /* get the macro block header */
 202                         ir_node *nmbh = get_new_node(mbh);
 203                         assert(nmbh != NULL);
 204                         set_Block_MacroBlock(nn, nmbh);
 205                 }
 206
 207                 /* Don't copy Bad nodes. */
 208                 j = 0;
 209                 irn_arity = get_irn_arity(n);
 210                 for (i = 0; i < irn_arity; i++) {
 211                         if (! is_Bad(get_irn_n(n, i))) {
 212                                 ir_node *pred = get_irn_n(n, i);
 213                                 set_irn_n(nn, j, get_new_node(pred));
 214                                 j++;
 215                         }
 216                 }
 217                 /* repair the block visited flag from above misuse. Repair it in both
 218                    graphs so that the old one can still be used. */
 219                 set_Block_block_visited(nn, 0);
 220                 set_Block_block_visited(n, 0);
 221                 /* Local optimization could not merge two subsequent blocks if
 222                    in array contained Bads.  Now it's possible.
 223                    We don't call optimize_in_place as it requires
 224                    that the fields in ir_graph are set properly. */
 225                 if ((get_opt_control_flow_straightening()) &&
 226                         (get_Block_n_cfgpreds(nn) == 1) &&
 227                         is_Jmp(get_Block_cfgpred(nn, 0))) {
 228                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
 229                         if (nn == old) {
 230                                 /* Jmp jumps into the block it is in -- deal self cycle. */
 231                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
 232                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
 233                         } else {
 234                                 exchange(nn, old);
 235                         }
 236                 }
 237         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
 238                 /* Don't copy node if corresponding predecessor in block is Bad.
 239                    The Block itself should not be Bad. */
 240                 block = get_nodes_block(n);
 241                 set_nodes_block(nn, get_new_node(block));
 242                 j = 0;
 243                 irn_arity = get_irn_arity(n);
 244                 for (i = 0; i < irn_arity; i++) {
 245                         if (! is_Bad(get_irn_n(block, i))) {
 246                                 ir_node *pred = get_irn_n(n, i);
 247                                 set_irn_n(nn, j, get_new_node(pred));
 248                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
 249                                 j++;
 250                         }
 251                 }
 252                 /* If the pre walker reached this Phi after the post walker visited the
 253                    block block_visited is > 0. */
 254                 set_Block_block_visited(get_nodes_block(n), 0);
 255                 /* Compacting the Phi's ins might generate Phis with only one
 256                    predecessor. */
 257                 if (get_irn_arity(nn) == 1)
 258                         exchange(nn, get_irn_n(nn, 0));
 259         } else {
 260                 irn_arity = get_irn_arity(n);
 261                 for (i = -1; i < irn_arity; i++)
 262                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
 263         }
 264         /* Now the new node is complete.  We can add it to the hash table for CSE.
 265            @@@ inlining aborts if we identify End. Why? */
 266         if (!is_End(nn))
 267                 add_identities(current_ir_graph->value_table, nn);
 268 }
 269
 270 /**
 271  * Copies the graph recursively, compacts the keep-alives of the end node.
 272  *
 273  * @param irg           the graph to be copied
 274  * @param copy_node_nr  If non-zero, the node number will be copied
 275  */
 276 static void copy_graph(ir_graph *irg, int copy_node_nr) {
 277         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
 278         ir_node *ka;      /* keep alive */
 279         int i, irn_arity;
 280         unsigned long vfl;
 281
 282         /* Some nodes must be copied by hand, sigh */
 283         vfl = get_irg_visited(irg);
 284         set_irg_visited(irg, vfl + 1);
 285
 286         oe = get_irg_end(irg);
 287         mark_irn_visited(oe);
 288         /* copy the end node by hand, allocate dynamic in array! */
 289         ne = new_ir_node(get_irn_dbg_info(oe),
 290                 irg,
 291                 NULL,
 292                 op_End,
 293                 mode_X,
 294                 -1,
 295                 NULL);
 296         /* Copy the attributes.  Well, there might be some in the future... */
 297         copy_node_attr(oe, ne);
 298         set_new_node(oe, ne);
 299
 300         /* copy the Bad node */
 301         ob = get_irg_bad(irg);
 302         mark_irn_visited(ob);
 303         nb = new_ir_node(get_irn_dbg_info(ob),
 304                 irg,
 305                 NULL,
 306                 op_Bad,
 307                 mode_T,
 308                 0,
 309                 NULL);
 310         copy_node_attr(ob, nb);
 311         set_new_node(ob, nb);
 312
 313         /* copy the NoMem node */
 314         om = get_irg_no_mem(irg);
 315         mark_irn_visited(om);
 316         nm = new_ir_node(get_irn_dbg_info(om),
 317                 irg,
 318                 NULL,
 319                 op_NoMem,
 320                 mode_M,
 321                 0,
 322                 NULL);
 323         copy_node_attr(om, nm);
 324         set_new_node(om, nm);
 325
 326         /* copy the live nodes */
 327         set_irg_visited(irg, vfl);
 328         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 329
 330         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
 331
 332         /* visit the anchors as well */
 333         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 334                 ir_node *n = get_irg_anchor(irg, i);
 335
 336                 if (n && (get_irn_visited(n) <= vfl)) {
 337                         set_irg_visited(irg, vfl);
 338                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 339                 }
 340         }
 341
 342         /* copy_preds for the end node ... */
 343         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
 344
 345         /*- ... and now the keep alives. -*/
 346         /* First pick the not marked block nodes and walk them.  We must pick these
 347            first as else we will oversee blocks reachable from Phis. */
 348         irn_arity = get_End_n_keepalives(oe);
 349         for (i = 0; i < irn_arity; i++) {
 350                 ka = get_End_keepalive(oe, i);
 351                 if (is_Block(ka)) {
 352                         if (get_irn_visited(ka) <= vfl) {
 353                                 /* We must keep the block alive and copy everything reachable */
 354                                 set_irg_visited(irg, vfl);
 355                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 356                         }
 357                         add_End_keepalive(ne, get_new_node(ka));
 358                 }
 359         }
 360
 361         /* Now pick other nodes.  Here we will keep all! */
 362         irn_arity = get_End_n_keepalives(oe);
 363         for (i = 0; i < irn_arity; i++) {
 364                 ka = get_End_keepalive(oe, i);
 365                 if (!is_Block(ka)) {
 366                         if (get_irn_visited(ka) <= vfl) {
 367                                 /* We didn't copy the node yet.  */
 368                                 set_irg_visited(irg, vfl);
 369                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 370                         }
 371                         add_End_keepalive(ne, get_new_node(ka));
 372                 }
 373         }
 374
 375         /* start block sometimes only reached after keep alives */
 376         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
 377         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
 378 }
 379
 380 /**
 381  * Copies the graph reachable from current_ir_graph->end to the obstack
 382  * in current_ir_graph and fixes the environment.
 383  * Then fixes the fields in current_ir_graph containing nodes of the
 384  * graph.
 385  *
 386  * @param copy_node_nr  If non-zero, the node number will be copied
 387  */
 388 static void
 389 copy_graph_env(int copy_node_nr) {
 390         ir_graph *irg = current_ir_graph;
 391         ir_node *old_end, *new_anchor;
 392         int i;
 393
 394         /* remove end_except and end_reg nodes */
 395         old_end = get_irg_end(irg);
 396         set_irg_end_except (irg, old_end);
 397         set_irg_end_reg    (irg, old_end);
 398
 399         /* Not all nodes remembered in irg might be reachable
 400            from the end node.  Assure their link is set to NULL, so that
 401            we can test whether new nodes have been computed. */
 402         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 403                 ir_node *n = get_irg_anchor(irg, i);
 404                 if (n != NULL)
 405                         set_new_node(n, NULL);
 406         }
 407         /* we use the block walk flag for removing Bads from Blocks ins. */
 408         inc_irg_block_visited(irg);
 409
 410         /* copy the graph */
 411         copy_graph(irg, copy_node_nr);
 412
 413         /* fix the anchor */
 414         old_end    = get_irg_end(irg);
 415         new_anchor = new_Anchor(irg);
 416
 417         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 418                 ir_node *n = get_irg_anchor(irg, i);
 419                 if (n)
 420                         set_irn_n(new_anchor, i, get_new_node(n));
 421         }
 422         free_End(old_end);
 423         irg->anchor = new_anchor;
 424
 425         /* ensure the new anchor is placed in the endblock */
 426         set_nodes_block(new_anchor, get_irg_end_block(irg));
 427 }
 428
 429 /**
 430  * Copies all reachable nodes to a new obstack.  Removes bad inputs
 431  * from block nodes and the corresponding inputs from Phi nodes.
 432  * Merges single exit blocks with single entry blocks and removes
 433  * 1-input Phis.
 434  * Adds all new nodes to a new hash table for CSE.  Does not
 435  * perform CSE, so the hash table might contain common subexpressions.
 436  */
 437 void dead_node_elimination(ir_graph *irg) {
 438         ir_graph *rem;
 439 #ifdef INTERPROCEDURAL_VIEW
 440         int rem_ipview = get_interprocedural_view();
 441 #endif
 442         struct obstack *graveyard_obst = NULL;
 443         struct obstack *rebirth_obst   = NULL;
 444
 445         edges_deactivate(irg);
 446
 447         /* inform statistics that we started a dead-node elimination run */
 448         hook_dead_node_elim(irg, 1);
 449
 450         /* Remember external state of current_ir_graph. */
 451         rem = current_ir_graph;
 452         current_ir_graph = irg;
 453 #ifdef INTERPROCEDURAL_VIEW
 454         set_interprocedural_view(0);
 455 #endif
 456
 457         assert(get_irg_phase_state(irg) != phase_building);
 458
 459         /* Handle graph state */
 460         free_callee_info(irg);
 461         free_irg_outs(irg);
 462         free_trouts();
 463
 464         /* @@@ so far we loose loops when copying */
 465         free_loop_information(irg);
 466
 467         set_irg_doms_inconsistent(irg);
 468
 469         /* A quiet place, where the old obstack can rest in peace,
 470            until it will be cremated. */
 471         graveyard_obst = irg->obst;
 472
 473         /* A new obstack, where the reachable nodes will be copied to. */
 474         rebirth_obst = xmalloc(sizeof(*rebirth_obst));
 475         irg->obst = rebirth_obst;
 476         obstack_init(irg->obst);
 477         irg->last_node_idx = 0;
 478
 479         /* We also need a new value table for CSE */
 480         del_identities(irg->value_table);
 481         irg->value_table = new_identities();
 482
 483         /* Copy the graph from the old to the new obstack */
 484         copy_graph_env(/*copy_node_nr=*/1);
 485
 486         /* Free memory from old unoptimized obstack */
 487         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
 488         xfree(graveyard_obst);            /* ... then free it.           */
 489
 490         /* inform statistics that the run is over */
 491         hook_dead_node_elim(irg, 0);
 492
 493         current_ir_graph = rem;
 494 #ifdef INTERPROCEDURAL_VIEW
 495         set_interprocedural_view(rem_ipview);
 496 #endif
 497 }
 498
 499 /**
 500  * Relink bad predecessors of a block and store the old in array to the
 501  * link field. This function is called by relink_bad_predecessors().
 502  * The array of link field starts with the block operand at position 0.
 503  * If block has bad predecessors, create a new in array without bad preds.
 504  * Otherwise let in array untouched.
 505  */
 506 static void relink_bad_block_predecessors(ir_node *n, void *env) {
 507         ir_node **new_in, *irn;
 508         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
 509         (void) env;
 510
 511         /* if link field of block is NULL, look for bad predecessors otherwise
 512            this is already done */
 513         if (is_Block(n) && get_irn_link(n) == NULL) {
 514                 /* save old predecessors in link field (position 0 is the block operand)*/
 515                 set_irn_link(n, get_irn_in(n));
 516
 517                 /* count predecessors without bad nodes */
 518                 old_irn_arity = get_irn_arity(n);
 519                 for (i = 0; i < old_irn_arity; i++)
 520                         if (!is_Bad(get_irn_n(n, i)))
 521                                 ++new_irn_arity;
 522
 523                 /* arity changing: set new predecessors without bad nodes */
 524                 if (new_irn_arity < old_irn_arity) {
 525                         /* Get new predecessor array. We do not resize the array, as we must
 526                            keep the old one to update Phis. */
 527                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
 528
 529                         /* set new predecessors in array */
 530                         new_in[0] = NULL;
 531                         new_irn_n = 1;
 532                         for (i = 0; i < old_irn_arity; i++) {
 533                                 irn = get_irn_n(n, i);
 534                                 if (!is_Bad(irn)) {
 535                                         new_in[new_irn_n] = irn;
 536                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
 537                                         ++new_irn_n;
 538                                 }
 539                         }
 540                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
 541                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
 542                         n->in = new_in;
 543                 } /* ir node has bad predecessors */
 544         } /* Block is not relinked */
 545 }
 546
 547 /**
 548  * Relinks Bad predecessors from Blocks and Phis called by walker
 549  * remove_bad_predecesors(). If n is a Block, call
 550  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
 551  * function of Phi's Block. If this block has bad predecessors, relink preds
 552  * of the Phi-node.
 553  */
 554 static void relink_bad_predecessors(ir_node *n, void *env) {
 555         ir_node *block, **old_in;
 556         int i, old_irn_arity, new_irn_arity;
 557
 558         /* relink bad predecessors of a block */
 559         if (is_Block(n))
 560                 relink_bad_block_predecessors(n, env);
 561
 562         /* If Phi node relink its block and its predecessors */
 563         if (is_Phi(n)) {
 564                 /* Relink predecessors of phi's block */
 565                 block = get_nodes_block(n);
 566                 if (get_irn_link(block) == NULL)
 567                         relink_bad_block_predecessors(block, env);
 568
 569                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
 570                 old_irn_arity = ARR_LEN(old_in);
 571
 572                 /* Relink Phi predecessors if count of predecessors changed */
 573                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
 574                         /* set new predecessors in array
 575                            n->in[0] remains the same block */
 576                         new_irn_arity = 1;
 577                         for(i = 1; i < old_irn_arity; i++)
 578                                 if (!is_Bad(old_in[i])) {
 579                                         n->in[new_irn_arity] = n->in[i];
 580                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
 581                                         ++new_irn_arity;
 582                                 }
 583
 584                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
 585                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
 586                 }
 587         } /* n is a Phi node */
 588 }
 589
 590 /*
 591  * Removes Bad Bad predecessors from Blocks and the corresponding
 592  * inputs to Phi nodes as in dead_node_elimination but without
 593  * copying the graph.
 594  * On walking up set the link field to NULL, on walking down call
 595  * relink_bad_predecessors() (This function stores the old in array
 596  * to the link field and sets a new in array if arity of predecessors
 597  * changes).
 598  */
 599 void remove_bad_predecessors(ir_graph *irg) {
 600         panic("Fix backedge handling first");
 601         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
 602 }
 603
 604
 605 /*
 606    __                      _  __ __
 607   (_     __    o     _    | \/  |_
 608   __)|_| | \_/ | \_/(/_   |_/\__|__
 609
 610   The following stuff implements a facility that automatically patches
 611   registered ir_node pointers to the new node when a dead node elimination occurs.
 612 */
 613
 614 struct _survive_dce_t {
 615         struct obstack obst;
 616         pmap *places;
 617         pmap *new_places;
 618         hook_entry_t dead_node_elim;
 619         hook_entry_t dead_node_elim_subst;
 620 };
 621
 622 typedef struct _survive_dce_list_t {
 623         struct _survive_dce_list_t *next;
 624         ir_node **place;
 625 } survive_dce_list_t;
 626
 627 static void dead_node_hook(void *context, ir_graph *irg, int start) {
 628         survive_dce_t *sd = context;
 629         (void) irg;
 630
 631         /* Create a new map before the dead node elimination is performed. */
 632         if (start) {
 633                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
 634         } else {
 635                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
 636                 pmap_destroy(sd->places);
 637                 sd->places     = sd->new_places;
 638                 sd->new_places = NULL;
 639         }
 640 }
 641
 642 /**
 643  * Hook called when dead node elimination replaces old by nw.
 644  */
 645 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw) {
 646         survive_dce_t *sd = context;
 647         survive_dce_list_t *list = pmap_get(sd->places, old);
 648         (void) irg;
 649
 650         /* If the node is to be patched back, write the new address to all registered locations. */
 651         if (list) {
 652                 survive_dce_list_t *p;
 653
 654                 for (p = list; p; p = p->next)
 655                         *(p->place) = nw;
 656
 657                 pmap_insert(sd->new_places, nw, list);
 658         }
 659 }
 660
 661 /**
 662  * Make a new Survive DCE environment.
 663  */
 664 survive_dce_t *new_survive_dce(void) {
 665         survive_dce_t *res = xmalloc(sizeof(res[0]));
 666         obstack_init(&res->obst);
 667         res->places     = pmap_create();
 668         res->new_places = NULL;
 669
 670         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
 671         res->dead_node_elim.context                   = res;
 672         res->dead_node_elim.next                      = NULL;
 673
 674         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
 675         res->dead_node_elim_subst.context = res;
 676         res->dead_node_elim_subst.next    = NULL;
 677
 678 #ifndef FIRM_ENABLE_HOOKS
 679         assert(0 && "need hooks enabled");
 680 #endif
 681
 682         register_hook(hook_dead_node_elim, &res->dead_node_elim);
 683         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
 684         return res;
 685 }
 686
 687 /**
 688  * Free a Survive DCE environment.
 689  */
 690 void free_survive_dce(survive_dce_t *sd) {
 691         obstack_free(&sd->obst, NULL);
 692         pmap_destroy(sd->places);
 693         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
 694         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
 695         xfree(sd);
 696 }
 697
 698 /**
 699  * Register a node pointer to be patched upon DCE.
 700  * When DCE occurs, the node pointer specified by @p place will be
 701  * patched to the new address of the node it is pointing to.
 702  *
 703  * @param sd    The Survive DCE environment.
 704  * @param place The address of the node pointer.
 705  */
 706 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
 707         if (*place != NULL) {
 708                 ir_node *irn      = *place;
 709                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
 710                 survive_dce_list_t *nw   = obstack_alloc(&sd->obst, sizeof(nw[0]));
 711
 712                 nw->next  = curr;
 713                 nw->place = place;
 714
 715                 pmap_insert(sd->places, irn, nw);
 716         }
 717 }
 718
 719 /*--------------------------------------------------------------------*/
 720 /*  Functionality for inlining                                         */
 721 /*--------------------------------------------------------------------*/
 722
 723 /**
 724  * Copy node for inlineing.  Updates attributes that change when
 725  * inlineing but not for dead node elimination.
 726  *
 727  * Copies the node by calling copy_node() and then updates the entity if
 728  * it's a local one.  env must be a pointer of the frame type of the
 729  * inlined procedure. The new entities must be in the link field of
 730  * the entities.
 731  */
 732 static void copy_node_inline(ir_node *n, void *env) {
 733         ir_node *nn;
 734         ir_type *frame_tp = (ir_type *)env;
 735
 736         copy_node(n, NULL);
 737         if (is_Sel(n)) {
 738                 nn = get_new_node (n);
 739                 assert(is_Sel(nn));
 740                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
 741                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
 742                 }
 743         } else if (is_Block(n)) {
 744                 nn = get_new_node (n);
 745                 nn->attr.block.irg = current_ir_graph;
 746         }
 747 }
 748
 749 /**
 750  * Copies new predecessors of old node and move constants to
 751  * the Start Block.
 752  */
 753 static void copy_preds_inline(ir_node *n, void *env) {
 754         ir_node *nn;
 755
 756         copy_preds(n, env);
 757         nn = skip_Id(get_new_node(n));
 758         if (is_irn_constlike(nn)) {
 759                 /* move Constants into the start block */
 760                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
 761
 762                 n = identify_remember(current_ir_graph->value_table, nn);
 763                 if (nn != n) {
 764                         DBG_OPT_CSE(nn, n);
 765                         exchange(nn, n);
 766                 }
 767         }
 768 }
 769
 770 /**
 771  * Walker: checks if P_value_arg_base is used.
 772  */
 773 static void find_addr(ir_node *node, void *env) {
 774         int *allow_inline = env;
 775         if (is_Proj(node) &&
 776                         is_Start(get_Proj_pred(node)) &&
 777                         get_Proj_proj(node) == pn_Start_P_value_arg_base) {
 778                 *allow_inline = 0;
 779         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
 780                 /* From GCC:
 781                  * Refuse to inline alloca call unless user explicitly forced so as this
 782                  * may change program's memory overhead drastically when the function
 783                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
 784                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
 785                  *
 786                  * Sorryly this is true with our implementation also.
 787                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
 788                  * disables inlining of functions using VLA (with are completely save).
 789                  *
 790                  * 2 Solutions:
 791                  * - add a flag to the Alloc node for "real" alloca() calls
 792                  * - add a new Stack-Restore node at the end of a function using alloca()
 793                  */
 794                 *allow_inline = 0;
 795         }
 796 }
 797
 798 /**
 799  * Check if we can inline a given call.
 800  * Currently, we cannot inline two cases:
 801  * - call with compound arguments
 802  * - graphs that take the address of a parameter
 803  *
 804  * check these conditions here
 805  */
 806 static int can_inline(ir_node *call, ir_graph *called_graph) {
 807         ir_type *call_type = get_Call_type(call);
 808         int params, ress, i, res;
 809         assert(is_Method_type(call_type));
 810
 811         params = get_method_n_params(call_type);
 812         ress   = get_method_n_ress(call_type);
 813
 814         /* check parameters for compound arguments */
 815         for (i = 0; i < params; ++i) {
 816                 ir_type *p_type = get_method_param_type(call_type, i);
 817
 818                 if (is_compound_type(p_type))
 819                         return 0;
 820         }
 821
 822         /* check results for compound arguments */
 823         for (i = 0; i < ress; ++i) {
 824                 ir_type *r_type = get_method_res_type(call_type, i);
 825
 826                 if (is_compound_type(r_type))
 827                         return 0;
 828         }
 829
 830         res = 1;
 831         irg_walk_graph(called_graph, find_addr, NULL, &res);
 832
 833         return res;
 834 }
 835
 836 enum exc_mode {
 837         exc_handler    = 0, /**< There is a handler. */
 838         exc_to_end     = 1, /**< Branches to End. */
 839         exc_no_handler = 2  /**< Exception handling not represented. */
 840 };
 841
 842 /* Inlines a method at the given call site. */
 843 int inline_method(ir_node *call, ir_graph *called_graph) {
 844         ir_node             *pre_call;
 845         ir_node             *post_call, *post_bl;
 846         ir_node             *in[pn_Start_max];
 847         ir_node             *end, *end_bl, *block;
 848         ir_node             **res_pred;
 849         ir_node             **cf_pred;
 850         ir_node             **args_in;
 851         ir_node             *ret, *phi;
 852         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
 853         enum exc_mode       exc_handling;
 854         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
 855         ir_entity           *ent;
 856         ir_graph            *rem, *irg;
 857         irg_inline_property prop = get_irg_inline_property(called_graph);
 858         unsigned long       visited;
 859
 860         if (prop == irg_inline_forbidden)
 861                 return 0;
 862
 863         ent = get_irg_entity(called_graph);
 864
 865         mtp = get_entity_type(ent);
 866         ctp = get_Call_type(call);
 867         if (get_method_n_params(mtp) > get_method_n_params(ctp)) {
 868                 /* this is a bad feature of C: without a prototype, we can can call a function with less
 869                 parameters than needed. Currently we don't support this, although it would be
 870                 to use Unknown than. */
 871                 return 0;
 872         }
 873
 874         /* Argh, compiling C has some bad consequences:
 875            the call type AND the method type might be different.
 876            It is implementation defendant what happens in that case.
 877            We support inlining, if the bitsize of the types matches AND
 878            the same arithmetic is used. */
 879         n_params = get_method_n_params(mtp);
 880         for (i = n_params - 1; i >= 0; --i) {
 881                 ir_type *param_tp = get_method_param_type(mtp, i);
 882                 ir_type *arg_tp   = get_method_param_type(ctp, i);
 883
 884                 if (param_tp != arg_tp) {
 885                         ir_mode *pmode = get_type_mode(param_tp);
 886                         ir_mode *amode = get_type_mode(arg_tp);
 887
 888                         if (pmode == NULL || amode == NULL)
 889                                 return 0;
 890                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
 891                                 return 0;
 892                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
 893                                 return 0;
 894                         /* otherwise we can simply "reinterpret" the bits */
 895                 }
 896         }
 897
 898         irg = get_irn_irg(call);
 899
 900         /*
 901          * We cannot inline a recursive call. The graph must be copied before
 902          * the call the inline_method() using create_irg_copy().
 903          */
 904         if (called_graph == irg)
 905                 return 0;
 906
 907         /*
 908          * currently, we cannot inline two cases:
 909          * - call with compound arguments
 910          * - graphs that take the address of a parameter
 911          */
 912         if (! can_inline(call, called_graph))
 913                 return 0;
 914
 915         rem = current_ir_graph;
 916         current_ir_graph = irg;
 917
 918         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
 919
 920         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
 921         rem_opt = get_opt_optimize();
 922         set_optimize(0);
 923
 924         /* Handle graph state */
 925         assert(get_irg_phase_state(irg) != phase_building);
 926         assert(get_irg_pinned(irg) == op_pin_state_pinned);
 927         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
 928         set_irg_outs_inconsistent(irg);
 929         set_irg_extblk_inconsistent(irg);
 930         set_irg_doms_inconsistent(irg);
 931         set_irg_loopinfo_inconsistent(irg);
 932         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 933
 934         /* -- Check preconditions -- */
 935         assert(is_Call(call));
 936
 937         /* here we know we WILL inline, so inform the statistics */
 938         hook_inline(call, called_graph);
 939
 940         /* -- Decide how to handle exception control flow: Is there a handler
 941            for the Call node, or do we branch directly to End on an exception?
 942            exc_handling:
 943            0 There is a handler.
 944            1 Branches to End.
 945            2 Exception handling not represented in Firm. -- */
 946         {
 947                 ir_node *proj, *Mproj = NULL, *Xproj = NULL;
 948                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
 949                         long proj_nr = get_Proj_proj(proj);
 950                         if (proj_nr == pn_Call_X_except) Xproj = proj;
 951                         if (proj_nr == pn_Call_M_except) Mproj = proj;
 952                 }
 953                 if      (Mproj) { assert(Xproj); exc_handling = exc_handler; } /*  Mproj           */
 954                 else if (Xproj) {                exc_handling = exc_to_end; } /* !Mproj &&  Xproj   */
 955                 else            {                exc_handling = exc_no_handler; } /* !Mproj && !Xproj   */
 956         }
 957
 958         /* create the argument tuple */
 959         NEW_ARR_A(ir_type *, args_in, n_params);
 960
 961         block = get_nodes_block(call);
 962         for (i = n_params - 1; i >= 0; --i) {
 963                 ir_node *arg      = get_Call_param(call, i);
 964                 ir_type *param_tp = get_method_param_type(mtp, i);
 965                 ir_mode *mode     = get_type_mode(param_tp);
 966
 967                 if (mode != get_irn_mode(arg)) {
 968                         arg = new_r_Conv(irg, block, arg, mode);
 969                 }
 970                 args_in[i] = arg;
 971         }
 972
 973         /* --
 974            the procedure and later replaces the Start node of the called graph.
 975            Post_call is the old Call node and collects the results of the called
 976            graph. Both will end up being a tuple.  -- */
 977         post_bl = get_nodes_block(call);
 978         set_irg_current_block(irg, post_bl);
 979         /* XxMxPxPxPxT of Start + parameter of Call */
 980         in[pn_Start_X_initial_exec]   = new_Jmp();
 981         in[pn_Start_M]                = get_Call_mem(call);
 982         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
 983         in[pn_Start_P_tls]            = get_irg_tls(irg);
 984         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
 985         /* in[pn_Start_P_value_arg_base] = ??? */
 986         assert(pn_Start_P_value_arg_base == pn_Start_max - 1 && "pn_Start_P_value_arg_base not supported, fix");
 987         pre_call = new_Tuple(pn_Start_max - 1, in);
 988         post_call = call;
 989
 990         /* --
 991            The new block gets the ins of the old block, pre_call and all its
 992            predecessors and all Phi nodes. -- */
 993         part_block(pre_call);
 994
 995         /* -- Prepare state for dead node elimination -- */
 996         /* Visited flags in calling irg must be >= flag in called irg.
 997            Else walker and arity computation will not work. */
 998         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
 999                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
1000         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
1001                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
1002         visited = get_irg_visited(irg);
1003
1004         /* Set pre_call as new Start node in link field of the start node of
1005            calling graph and pre_calls block as new block for the start block
1006            of calling graph.
1007            Further mark these nodes so that they are not visited by the
1008            copying. */
1009         set_irn_link(get_irg_start(called_graph), pre_call);
1010         set_irn_visited(get_irg_start(called_graph), visited);
1011         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1012         set_irn_visited(get_irg_start_block(called_graph), visited);
1013
1014         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1015         set_irn_visited(get_irg_bad(called_graph), visited);
1016
1017         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1018         set_irn_visited(get_irg_no_mem(called_graph), visited);
1019
1020         /* Initialize for compaction of in arrays */
1021         inc_irg_block_visited(irg);
1022
1023         /* -- Replicate local entities of the called_graph -- */
1024         /* copy the entities. */
1025         called_frame = get_irg_frame_type(called_graph);
1026         curr_frame   = get_irg_frame_type(irg);
1027         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1028                 ir_entity *new_ent, *old_ent;
1029                 old_ent = get_class_member(called_frame, i);
1030                 new_ent = copy_entity_own(old_ent, curr_frame);
1031                 set_entity_link(old_ent, new_ent);
1032         }
1033
1034         /* visited is > than that of called graph.  With this trick visited will
1035            remain unchanged so that an outer walker, e.g., searching the call nodes
1036             to inline, calling this inline will not visit the inlined nodes. */
1037         set_irg_visited(irg, get_irg_visited(irg)-1);
1038
1039         /* -- Performing dead node elimination inlines the graph -- */
1040         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1041            entities. */
1042         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1043                  get_irg_frame_type(called_graph));
1044
1045         /* Repair called_graph */
1046         set_irg_visited(called_graph, get_irg_visited(irg));
1047         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1048         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1049
1050         /* -- Merge the end of the inlined procedure with the call site -- */
1051         /* We will turn the old Call node into a Tuple with the following
1052            predecessors:
1053            -1:  Block of Tuple.
1054            0: Phi of all Memories of Return statements.
1055            1: Jmp from new Block that merges the control flow from all exception
1056            predecessors of the old end block.
1057            2: Tuple of all arguments.
1058            3: Phi of Exception memories.
1059            In case the old Call directly branches to End on an exception we don't
1060            need the block merging all exceptions nor the Phi of the exception
1061            memories.
1062         */
1063
1064         /* -- Precompute some values -- */
1065         end_bl = get_new_node(get_irg_end_block(called_graph));
1066         end = get_new_node(get_irg_end(called_graph));
1067         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1068         n_res = get_method_n_ress(get_Call_type(call));
1069
1070         res_pred = xmalloc(n_res * sizeof(*res_pred));
1071         cf_pred  = xmalloc(arity * sizeof(*res_pred));
1072
1073         set_irg_current_block(irg, post_bl); /* just to make sure */
1074
1075         /* -- archive keepalives -- */
1076         irn_arity = get_irn_arity(end);
1077         for (i = 0; i < irn_arity; i++) {
1078                 ir_node *ka = get_End_keepalive(end, i);
1079                 if (! is_Bad(ka))
1080                         add_End_keepalive(get_irg_end(irg), ka);
1081         }
1082
1083         /* The new end node will die.  We need not free as the in array is on the obstack:
1084            copy_node() only generated 'D' arrays. */
1085
1086         /* -- Replace Return nodes by Jump nodes. -- */
1087         n_ret = 0;
1088         for (i = 0; i < arity; i++) {
1089                 ir_node *ret;
1090                 ret = get_Block_cfgpred(end_bl, i);
1091                 if (is_Return(ret)) {
1092                         cf_pred[n_ret] = new_r_Jmp(irg, get_nodes_block(ret));
1093                         n_ret++;
1094                 }
1095         }
1096         set_irn_in(post_bl, n_ret, cf_pred);
1097
1098         /* -- Build a Tuple for all results of the method.
1099            Add Phi node if there was more than one Return.  -- */
1100         turn_into_tuple(post_call, pn_Call_max);
1101         /* First the Memory-Phi */
1102         n_ret = 0;
1103         for (i = 0; i < arity; i++) {
1104                 ret = get_Block_cfgpred(end_bl, i);
1105                 if (is_Return(ret)) {
1106                         cf_pred[n_ret] = get_Return_mem(ret);
1107                         n_ret++;
1108                 }
1109         }
1110         phi = new_Phi(n_ret, cf_pred, mode_M);
1111         set_Tuple_pred(call, pn_Call_M_regular, phi);
1112         /* Conserve Phi-list for further inlinings -- but might be optimized */
1113         if (get_nodes_block(phi) == post_bl) {
1114                 set_irn_link(phi, get_irn_link(post_bl));
1115                 set_irn_link(post_bl, phi);
1116         }
1117         /* Now the real results */
1118         if (n_res > 0) {
1119                 for (j = 0; j < n_res; j++) {
1120                         n_ret = 0;
1121                         for (i = 0; i < arity; i++) {
1122                                 ret = get_Block_cfgpred(end_bl, i);
1123                                 if (is_Return(ret)) {
1124                                         cf_pred[n_ret] = get_Return_res(ret, j);
1125                                         n_ret++;
1126                                 }
1127                         }
1128                         if (n_ret > 0)
1129                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1130                         else
1131                                 phi = new_Bad();
1132                         res_pred[j] = phi;
1133                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1134                         if (get_nodes_block(phi) == post_bl) {
1135                                 set_Phi_next(phi, get_Block_phis(post_bl));
1136                                 set_Block_phis(post_bl, phi);
1137                         }
1138                 }
1139                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1140         } else {
1141                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1142         }
1143         /* handle the regular call */
1144         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1145
1146         /* For now, we cannot inline calls with value_base */
1147         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1148
1149         /* Finally the exception control flow.
1150            We have two (three) possible situations:
1151            First if the Call branches to an exception handler: We need to add a Phi node to
1152            collect the memory containing the exception objects.  Further we need
1153            to add another block to get a correct representation of this Phi.  To
1154            this block we add a Jmp that resolves into the X output of the Call
1155            when the Call is turned into a tuple.
1156            Second the Call branches to End, the exception is not handled.  Just
1157            add all inlined exception branches to the End node.
1158            Third: there is no Exception edge at all. Handle as case two. */
1159         if (exc_handling == exc_handler) {
1160                 n_exc = 0;
1161                 for (i = 0; i < arity; i++) {
1162                         ir_node *ret, *irn;
1163                         ret = get_Block_cfgpred(end_bl, i);
1164                         irn = skip_Proj(ret);
1165                         if (is_fragile_op(irn) || is_Raise(irn)) {
1166                                 cf_pred[n_exc] = ret;
1167                                 ++n_exc;
1168                         }
1169                 }
1170                 if (n_exc > 0) {
1171                         new_Block(n_exc, cf_pred);      /* watch it: current_block is changed! */
1172                         set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1173                         /* The Phi for the memories with the exception objects */
1174                         n_exc = 0;
1175                         for (i = 0; i < arity; i++) {
1176                                 ir_node *ret;
1177                                 ret = skip_Proj(get_Block_cfgpred(end_bl, i));
1178                                 if (is_Call(ret)) {
1179                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 3);
1180                                         n_exc++;
1181                                 } else if (is_fragile_op(ret)) {
1182                                         /* We rely that all cfops have the memory output at the same position. */
1183                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 0);
1184                                         n_exc++;
1185                                 } else if (is_Raise(ret)) {
1186                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 1);
1187                                         n_exc++;
1188                                 }
1189                         }
1190                         set_Tuple_pred(call, pn_Call_M_except, new_Phi(n_exc, cf_pred, mode_M));
1191                 } else {
1192                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1193                         set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1194                 }
1195         } else {
1196                 ir_node *main_end_bl;
1197                 int main_end_bl_arity;
1198                 ir_node **end_preds;
1199
1200                 /* assert(exc_handling == 1 || no exceptions. ) */
1201                 n_exc = 0;
1202                 for (i = 0; i < arity; i++) {
1203                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1204                         ir_node *irn = skip_Proj(ret);
1205
1206                         if (is_fragile_op(irn) || is_Raise(irn)) {
1207                                 cf_pred[n_exc] = ret;
1208                                 n_exc++;
1209                         }
1210                 }
1211                 main_end_bl = get_irg_end_block(irg);
1212                 main_end_bl_arity = get_irn_arity(main_end_bl);
1213                 end_preds =  xmalloc((n_exc + main_end_bl_arity) * sizeof(*end_preds));
1214
1215                 for (i = 0; i < main_end_bl_arity; ++i)
1216                         end_preds[i] = get_irn_n(main_end_bl, i);
1217                 for (i = 0; i < n_exc; ++i)
1218                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1219                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1220                 set_Tuple_pred(call, pn_Call_X_except,  new_Bad());
1221                 set_Tuple_pred(call, pn_Call_M_except,  new_Bad());
1222                 free(end_preds);
1223         }
1224         free(res_pred);
1225         free(cf_pred);
1226
1227         /* --  Turn CSE back on. -- */
1228         set_optimize(rem_opt);
1229         current_ir_graph = rem;
1230
1231         return 1;
1232 }
1233
1234 /********************************************************************/
1235 /* Apply inlineing to small methods.                                */
1236 /********************************************************************/
1237
1238 static struct obstack  temp_obst;
1239
1240 /** Represents a possible inlinable call in a graph. */
1241 typedef struct _call_entry call_entry;
1242 struct _call_entry {
1243         ir_node    *call;      /**< the Call node */
1244         ir_graph   *callee;    /**< the callee IR-graph called here */
1245         call_entry *next;      /**< for linking the next one */
1246         int         loop_depth; /**< the loop depth of this call */
1247         unsigned    local_adr : 1;
1248 };
1249
1250 /**
1251  * environment for inlining small irgs
1252  */
1253 typedef struct _inline_env_t {
1254         struct obstack obst;  /**< an obstack where call_entries are allocated on. */
1255         call_entry *head;     /**< the head of the call entry list */
1256         call_entry *tail;     /**< the tail of the call entry list */
1257 } inline_env_t;
1258
1259 /**
1260  * Returns the irg called from a Call node. If the irg is not
1261  * known, NULL is returned.
1262  *
1263  * @param call  the call node
1264  */
1265 static ir_graph *get_call_called_irg(ir_node *call) {
1266         ir_node *addr;
1267
1268         addr = get_Call_ptr(call);
1269         if (is_Global(addr)) {
1270                 ir_entity *ent = get_Global_entity(addr);
1271                 return get_entity_irg(ent);
1272         }
1273
1274         return NULL;
1275 }
1276
1277 /**
1278  * Walker: Collect all calls to known graphs inside a graph.
1279  */
1280 static void collect_calls(ir_node *call, void *env) {
1281         if (is_Call(call)) {
1282                 ir_graph *called_irg = get_call_called_irg(call);
1283
1284                 if (called_irg != NULL) {
1285                         /* The Call node calls a locally defined method.  Remember to inline. */
1286                         inline_env_t *ienv  = env;
1287                         call_entry   *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
1288                         entry->call       = call;
1289                         entry->callee     = called_irg;
1290                         entry->next       = NULL;
1291                         entry->loop_depth = 0;
1292
1293                         if (ienv->tail == NULL)
1294                                 ienv->head = entry;
1295                         else
1296                                 ienv->tail->next = entry;
1297                         ienv->tail = entry;
1298                 }
1299         }
1300 }
1301
1302 /**
1303  * Inlines all small methods at call sites where the called address comes
1304  * from a Const node that references the entity representing the called
1305  * method.
1306  * The size argument is a rough measure for the code size of the method:
1307  * Methods where the obstack containing the firm graph is smaller than
1308  * size are inlined.
1309  */
1310 void inline_small_irgs(ir_graph *irg, int size) {
1311         ir_graph *rem = current_ir_graph;
1312         inline_env_t env;
1313         call_entry *entry;
1314
1315         current_ir_graph = irg;
1316         /* Handle graph state */
1317         assert(get_irg_phase_state(irg) != phase_building);
1318         free_callee_info(irg);
1319
1320         /* Find Call nodes to inline.
1321            (We can not inline during a walk of the graph, as inlineing the same
1322            method several times changes the visited flag of the walked graph:
1323            after the first inlineing visited of the callee equals visited of
1324            the caller.  With the next inlineing both are increased.) */
1325         obstack_init(&env.obst);
1326         env.head = env.tail = NULL;
1327         irg_walk_graph(irg, NULL, collect_calls, &env);
1328
1329         if (env.head != NULL) {
1330                 /* There are calls to inline */
1331                 collect_phiprojs(irg);
1332
1333                 for (entry = env.head; entry != NULL; entry = entry->next) {
1334                         ir_graph            *callee = entry->callee;
1335                         irg_inline_property prop    = get_irg_inline_property(callee);
1336
1337                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1338                                 /* do not inline forbidden / weak graphs */
1339                                 continue;
1340                         }
1341
1342                         if (prop >= irg_inline_forced ||
1343                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1344                                 inline_method(entry->call, callee);
1345                         }
1346                 }
1347         }
1348         obstack_free(&env.obst, NULL);
1349         current_ir_graph = rem;
1350 }
1351
1352 /**
1353  * Environment for inlining irgs.
1354  */
1355 typedef struct {
1356         unsigned n_nodes;             /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1357         unsigned n_blocks;            /**< Number of Blocks in graph without Start and End block. */
1358         unsigned n_nodes_orig;        /**< for statistics */
1359         unsigned n_call_nodes;        /**< Number of Call nodes in the graph. */
1360         unsigned n_call_nodes_orig;   /**< for statistics */
1361         unsigned n_callers;           /**< Number of known graphs that call this graphs. */
1362         unsigned n_callers_orig;      /**< for statistics */
1363         unsigned got_inline:1;   /**< Set, if at least one call inside this graph was inlined. */
1364         unsigned local_vars:1;   /**< Set, if a inlined function gets the address of an inlined variable. */
1365         unsigned recursive:1;    /**< Set, if this function is self recursive. */
1366         call_entry *call_head;   /**< The head of the list of all call nodes in this graph. */
1367         call_entry *call_tail;   /**< The tail of the list of all call nodes in this graph .*/
1368         unsigned *local_weights; /**< Once allocated, the beneficial weight for transmitting local addresses. */
1369 } inline_irg_env;
1370
1371 /**
1372  * Allocate a new environment for inlining.
1373  */
1374 static inline_irg_env *alloc_inline_irg_env(void) {
1375         inline_irg_env *env    = obstack_alloc(&temp_obst, sizeof(*env));
1376         env->n_nodes           = -2; /* do not count count Start, End */
1377         env->n_blocks          = -2; /* do not count count Start, End Block */
1378         env->n_nodes_orig      = -2; /* do not count Start, End */
1379         env->call_head         = NULL;
1380         env->call_tail         = NULL;
1381         env->n_call_nodes      = 0;
1382         env->n_call_nodes_orig = 0;
1383         env->n_callers         = 0;
1384         env->n_callers_orig    = 0;
1385         env->got_inline        = 0;
1386         env->local_vars        = 0;
1387         env->recursive         = 0;
1388         env->local_weights     = NULL;
1389         return env;
1390 }
1391
1392 typedef struct walker_env {
1393         inline_irg_env *x;     /**< the inline environment */
1394         char ignore_runtime;   /**< the ignore runtime flag */
1395         char ignore_callers;   /**< if set, do change callers data */
1396 } wenv_t;
1397
1398 /**
1399  * post-walker: collect all calls in the inline-environment
1400  * of a graph and sum some statistics.
1401  */
1402 static void collect_calls2(ir_node *call, void *ctx) {
1403         wenv_t         *env = ctx;
1404         inline_irg_env *x = env->x;
1405         ir_opcode      code = get_irn_opcode(call);
1406         ir_graph       *callee;
1407         call_entry     *entry;
1408
1409         /* count meaningful nodes in irg */
1410         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1411                 if (code != iro_Block) {
1412                         ++x->n_nodes;
1413                         ++x->n_nodes_orig;
1414                 } else {
1415                         ++x->n_blocks;
1416                 }
1417         }
1418
1419         if (code != iro_Call) return;
1420
1421         /* check, if it's a runtime call */
1422         if (env->ignore_runtime) {
1423                 ir_node *symc = get_Call_ptr(call);
1424
1425                 if (is_Global(symc)) {
1426                         ir_entity *ent = get_Global_entity(symc);
1427
1428                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1429                                 return;
1430                 }
1431         }
1432
1433         /* collect all call nodes */
1434         ++x->n_call_nodes;
1435         ++x->n_call_nodes_orig;
1436
1437         callee = get_call_called_irg(call);
1438         if (callee != NULL) {
1439                 if (! env->ignore_callers) {
1440                         inline_irg_env *callee_env = get_irg_link(callee);
1441                         /* count all static callers */
1442                         ++callee_env->n_callers;
1443                         ++callee_env->n_callers_orig;
1444                 }
1445                 if (callee == current_ir_graph)
1446                         x->recursive = 1;
1447
1448                 /* link it in the list of possible inlinable entries */
1449                 entry = obstack_alloc(&temp_obst, sizeof(*entry));
1450                 entry->call       = call;
1451                 entry->callee     = callee;
1452                 entry->next       = NULL;
1453                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1454
1455                 entry->next  = x->call_head;
1456                 x->call_head = entry;
1457
1458                 if (entry->next == NULL) {
1459                         /* keep tail up to date */
1460                         x->call_tail = entry;
1461                 }
1462         }
1463 }
1464
1465 /**
1466  * Returns TRUE if the number of callers is 0 in the irg's environment,
1467  * hence this irg is a leave.
1468  */
1469 INLINE static int is_leave(ir_graph *irg) {
1470         inline_irg_env *env = get_irg_link(irg);
1471         return env->n_call_nodes == 0;
1472 }
1473
1474 /**
1475  * Returns TRUE if the number of nodes in the callee is
1476  * smaller then size in the irg's environment.
1477  */
1478 INLINE static int is_smaller(ir_graph *callee, unsigned size) {
1479         inline_irg_env *env = get_irg_link(callee);
1480         return env->n_nodes < size;
1481 }
1482
1483 /**
1484  * Append the nodes of the list src to the nodes of the list in environment dst.
1485  */
1486 static void append_call_list(inline_irg_env *dst, call_entry *src) {
1487         call_entry *entry, *nentry;
1488
1489         /* Note that the src list points to Call nodes in the inlined graph, but
1490            we need Call nodes in our graph. Luckily the inliner leaves this information
1491            in the link field. */
1492         for (entry = src; entry != NULL; entry = entry->next) {
1493                 nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1494                 nentry->call         = get_irn_link(entry->call);
1495                 nentry->callee       = entry->callee;
1496                 nentry->next         = NULL;
1497                 nentry->loop_depth   = entry->loop_depth;
1498                 dst->call_tail->next = nentry;
1499                 dst->call_tail       = nentry;
1500         }
1501 }
1502
1503 static call_entry *duplicate_call_entry(const call_entry *entry,
1504                 int loop_depth_delta, ir_node *new_call)
1505 {
1506         call_entry *nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1507         nentry->call       = new_call;
1508         nentry->callee     = entry->callee;
1509         nentry->next       = NULL;
1510         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1511
1512         return nentry;
1513 }
1514
1515 /*
1516  * Inlines small leave methods at call sites where the called address comes
1517  * from a Const node that references the entity representing the called
1518  * method.
1519  * The size argument is a rough measure for the code size of the method:
1520  * Methods where the obstack containing the firm graph is smaller than
1521  * size are inlined.
1522  */
1523 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1524                 unsigned size, int ignore_runtime)
1525 {
1526         inline_irg_env   *env;
1527         ir_graph         *irg;
1528         int              i, n_irgs;
1529         ir_graph         *rem;
1530         int              did_inline;
1531         wenv_t           wenv;
1532         call_entry       *entry, *tail;
1533         const call_entry *centry;
1534         pmap             *copied_graphs;
1535         pmap_entry       *pm_entry;
1536
1537         rem = current_ir_graph;
1538         obstack_init(&temp_obst);
1539
1540         /* a map for the copied graphs, used to inline recursive calls */
1541         copied_graphs = pmap_create();
1542
1543         /* extend all irgs by a temporary data structure for inlining. */
1544         n_irgs = get_irp_n_irgs();
1545         for (i = 0; i < n_irgs; ++i)
1546                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1547
1548         /* Precompute information in temporary data structure. */
1549         wenv.ignore_runtime = ignore_runtime;
1550         wenv.ignore_callers = 0;
1551         for (i = 0; i < n_irgs; ++i) {
1552                 ir_graph *irg = get_irp_irg(i);
1553
1554                 assert(get_irg_phase_state(irg) != phase_building);
1555                 free_callee_info(irg);
1556
1557                 assure_cf_loop(irg);
1558                 wenv.x = get_irg_link(irg);
1559                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1560         }
1561
1562         /* -- and now inline. -- */
1563
1564         /* Inline leaves recursively -- we might construct new leaves. */
1565         do {
1566                 did_inline = 0;
1567
1568                 for (i = 0; i < n_irgs; ++i) {
1569                         ir_node *call;
1570                         int phiproj_computed = 0;
1571
1572                         current_ir_graph = get_irp_irg(i);
1573                         env = (inline_irg_env *)get_irg_link(current_ir_graph);
1574
1575                         tail = NULL;
1576                         for (entry = env->call_head; entry != NULL; entry = entry->next) {
1577                                 ir_graph            *callee;
1578                                 irg_inline_property  prop;
1579
1580                                 if (env->n_nodes > maxsize)
1581                                         break;
1582
1583                                 call   = entry->call;
1584                                 callee = entry->callee;
1585
1586                                 prop = get_irg_inline_property(callee);
1587                                 if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1588                                         /* do not inline forbidden / weak graphs */
1589                                         continue;
1590                                 }
1591
1592                                 if (is_leave(callee) && (
1593                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1594                                         if (!phiproj_computed) {
1595                                                 phiproj_computed = 1;
1596                                                 collect_phiprojs(current_ir_graph);
1597                                         }
1598                                         did_inline = inline_method(call, callee);
1599
1600                                         if (did_inline) {
1601                                                 inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1602
1603                                                 /* was inlined, must be recomputed */
1604                                                 phiproj_computed = 0;
1605
1606                                                 /* Do some statistics */
1607                                                 env->got_inline = 1;
1608                                                 --env->n_call_nodes;
1609                                                 env->n_nodes += callee_env->n_nodes;
1610                                                 --callee_env->n_callers;
1611
1612                                                 /* remove this call from the list */
1613                                                 if (tail != NULL)
1614                                                         tail->next = entry->next;
1615                                                 else
1616                                                         env->call_head = entry->next;
1617                                                 continue;
1618                                         }
1619                                 }
1620                                 tail = entry;
1621                         }
1622                         env->call_tail = tail;
1623                 }
1624         } while (did_inline);
1625
1626         /* inline other small functions. */
1627         for (i = 0; i < n_irgs; ++i) {
1628                 ir_node *call;
1629                 int phiproj_computed = 0;
1630
1631                 current_ir_graph = get_irp_irg(i);
1632                 env = (inline_irg_env *)get_irg_link(current_ir_graph);
1633
1634                 /* note that the list of possible calls is updated during the process */
1635                 tail = NULL;
1636                 for (entry = env->call_head; entry != NULL; entry = entry->next) {
1637                         irg_inline_property prop;
1638                         ir_graph            *callee;
1639                         pmap_entry          *e;
1640
1641                         call   = entry->call;
1642                         callee = entry->callee;
1643
1644                         prop = get_irg_inline_property(callee);
1645                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1646                                 /* do not inline forbidden / weak graphs */
1647                                 continue;
1648                         }
1649
1650                         e = pmap_find(copied_graphs, callee);
1651                         if (e != NULL) {
1652                                 /*
1653                                  * Remap callee if we have a copy.
1654                                  * FIXME: Should we do this only for recursive Calls ?
1655                                  */
1656                                 callee = e->value;
1657                         }
1658
1659                         if (prop >= irg_inline_forced ||
1660                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1661                                 if (current_ir_graph == callee) {
1662                                         /*
1663                                          * Recursive call: we cannot directly inline because we cannot walk
1664                                          * the graph and change it. So we have to make a copy of the graph
1665                                          * first.
1666                                          */
1667
1668                                         inline_irg_env *callee_env;
1669                                         ir_graph       *copy;
1670
1671                                         /*
1672                                          * No copy yet, create one.
1673                                          * Note that recursive methods are never leaves, so it is sufficient
1674                                          * to test this condition here.
1675                                          */
1676                                         copy = create_irg_copy(callee);
1677
1678                                         /* create_irg_copy() destroys the Proj links, recompute them */
1679                                         phiproj_computed = 0;
1680
1681                                         /* allocate new environment */
1682                                         callee_env = alloc_inline_irg_env();
1683                                         set_irg_link(copy, callee_env);
1684
1685                                         assure_cf_loop(copy);
1686                                         wenv.x              = callee_env;
1687                                         wenv.ignore_callers = 1;
1688                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1689
1690                                         /*
1691                                          * Enter the entity of the original graph. This is needed
1692                                          * for inline_method(). However, note that ent->irg still points
1693                                          * to callee, NOT to copy.
1694                                          */
1695                                         set_irg_entity(copy, get_irg_entity(callee));
1696
1697                                         pmap_insert(copied_graphs, callee, copy);
1698                                         callee = copy;
1699
1700                                         /* we have only one caller: the original graph */
1701                                         callee_env->n_callers      = 1;
1702                                         callee_env->n_callers_orig = 1;
1703                                 }
1704                                 if (! phiproj_computed) {
1705                                         phiproj_computed = 1;
1706                                         collect_phiprojs(current_ir_graph);
1707                                 }
1708                                 did_inline = inline_method(call, callee);
1709                                 if (did_inline) {
1710                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1711
1712                                         /* was inlined, must be recomputed */
1713                                         phiproj_computed = 0;
1714
1715                                         /* callee was inline. Append it's call list. */
1716                                         env->got_inline = 1;
1717                                         --env->n_call_nodes;
1718                                         append_call_list(env, callee_env->call_head);
1719                                         env->n_call_nodes += callee_env->n_call_nodes;
1720                                         env->n_nodes += callee_env->n_nodes;
1721                                         --callee_env->n_callers;
1722
1723                                         /* after we have inlined callee, all called methods inside callee
1724                                            are now called once more */
1725                                         for (centry = callee_env->call_head; centry != NULL; centry = centry->next) {
1726                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1727                                                 ++penv->n_callers;
1728                                         }
1729
1730                                         /* remove this call from the list */
1731                                         if (tail != NULL)
1732                                                 tail->next = entry->next;
1733                                         else
1734                                                 env->call_head = entry->next;
1735                                         continue;
1736                                 }
1737                         }
1738                         tail = entry;
1739                 }
1740                 env->call_tail = tail;
1741         }
1742
1743         for (i = 0; i < n_irgs; ++i) {
1744                 irg = get_irp_irg(i);
1745                 env = (inline_irg_env *)get_irg_link(irg);
1746
1747                 if (env->got_inline) {
1748                         optimize_graph_df(irg);
1749                         optimize_cf(irg);
1750                 }
1751                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1752                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1753                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1754                         env->n_callers_orig, env->n_callers,
1755                         get_entity_name(get_irg_entity(irg))));
1756                 }
1757         }
1758
1759         /* kill the copied graphs: we don't need them anymore */
1760         foreach_pmap(copied_graphs, pm_entry) {
1761                 ir_graph *copy = pm_entry->value;
1762
1763                 /* reset the entity, otherwise it will be deleted in the next step ... */
1764                 set_irg_entity(copy, NULL);
1765                 free_ir_graph(copy);
1766         }
1767         pmap_destroy(copied_graphs);
1768
1769         obstack_free(&temp_obst, NULL);
1770         current_ir_graph = rem;
1771 }
1772
1773 /**
1774  * Calculate the parameter weights for transmitting the address of a local variable.
1775  */
1776 static unsigned calc_method_local_weight(ir_node *arg) {
1777         int      i, j, k;
1778         unsigned v, weight = 0;
1779
1780         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1781                 ir_node *succ = get_irn_out(arg, i);
1782
1783                 switch (get_irn_opcode(succ)) {
1784                 case iro_Load:
1785                 case iro_Store:
1786                         /* Loads and Store can be removed */
1787                         weight += 3;
1788                         break;
1789                 case iro_Sel:
1790                         /* check if all args are constant */
1791                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1792                                 ir_node *idx = get_Sel_index(succ, j);
1793                                 if (! is_Const(idx))
1794                                         return 0;
1795                         }
1796                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1797                            some unsupported node. */
1798                         v = calc_method_local_weight(succ);
1799                         if (v == 0)
1800                                 return 0;
1801                         /* we can kill one Sel with constant indexes, this is cheap */
1802                         weight += v + 1;
1803                         break;
1804                 case iro_Id:
1805                         /* when looking backward we might find Id nodes */
1806                         weight += calc_method_local_weight(succ);
1807                         break;
1808                 case iro_Tuple:
1809                         /* unoptimized tuple */
1810                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1811                                 ir_node *pred = get_Tuple_pred(succ, j);
1812                                 if (pred == arg) {
1813                                         /* look for Proj(j) */
1814                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1815                                                 ir_node *succ_succ = get_irn_out(succ, k);
1816                                                 if (is_Proj(succ_succ)) {
1817                                                         if (get_Proj_proj(succ_succ) == j) {
1818                                                                 /* found */
1819                                                                 weight += calc_method_local_weight(succ_succ);
1820                                                         }
1821                                                 } else {
1822                                                         /* this should NOT happen */
1823                                                         return 0;
1824                                                 }
1825                                         }
1826                                 }
1827                         }
1828                         break;
1829                 default:
1830                         /* any other node: unsupported yet or bad. */
1831                         return 0;
1832                 }
1833         }
1834         return weight;
1835 }
1836
1837 /**
1838  * Calculate the parameter weights for transmitting the address of a local variable.
1839  */
1840 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
1841         ir_entity *ent = get_irg_entity(irg);
1842         ir_type  *mtp;
1843         int      nparams, i, proj_nr;
1844         ir_node  *irg_args, *arg;
1845
1846         mtp      = get_entity_type(ent);
1847         nparams  = get_method_n_params(mtp);
1848
1849         /* allocate a new array. currently used as 'analysed' flag */
1850         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1851
1852         /* If the method haven't parameters we have nothing to do. */
1853         if (nparams <= 0)
1854                 return;
1855
1856         assure_irg_outs(irg);
1857         irg_args = get_irg_args(irg);
1858         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1859                 arg     = get_irn_out(irg_args, i);
1860                 proj_nr = get_Proj_proj(arg);
1861                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1862         }
1863 }
1864
1865 /**
1866  * Calculate the benefice for transmitting an local variable address.
1867  * After inlining, the local variable might be transformed into a
1868  * SSA variable by scalar_replacement().
1869  */
1870 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
1871         inline_irg_env *env = get_irg_link(callee);
1872
1873         if (env->local_weights != NULL) {
1874                 if (pos < ARR_LEN(env->local_weights))
1875                         return env->local_weights[pos];
1876                 return 0;
1877         }
1878
1879         analyze_irg_local_weights(env, callee);
1880
1881         if (pos < ARR_LEN(env->local_weights))
1882                 return env->local_weights[pos];
1883         return 0;
1884 }
1885
1886 /**
1887  * Calculate a benefice value for inlining the given call.
1888  *
1889  * @param call       the call node we have to inspect
1890  * @param callee     the called graph
1891  * @param local_adr  set after return if an address of a local variable is
1892  *                   transmitted as a parameter
1893  */
1894 static int calc_inline_benefice(const call_entry *entry, ir_graph *callee,
1895                                 unsigned *local_adr)
1896 {
1897         ir_node   *call = entry->call;
1898         ir_entity *ent  = get_irg_entity(callee);
1899         ir_node   *frame_ptr;
1900         ir_type   *mtp;
1901         int       weight = 0;
1902         int       i, n_params, all_const;
1903         unsigned  cc, v;
1904         irg_inline_property prop;
1905
1906         inline_irg_env *callee_env;
1907
1908         prop = get_irg_inline_property(callee);
1909         if (prop == irg_inline_forbidden) {
1910                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1911                     call, callee));
1912                 return INT_MIN;
1913         }
1914
1915         if ( (get_irg_additional_properties(callee)
1916                                 | get_entity_additional_properties(ent))
1917                         & (mtp_property_noreturn | mtp_property_weak)) {
1918                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1919                     call, callee));
1920                 return INT_MIN;
1921         }
1922
1923         /* costs for every passed parameter */
1924         n_params = get_Call_n_params(call);
1925         mtp      = get_entity_type(ent);
1926         cc       = get_method_calling_convention(mtp);
1927         if (cc & cc_reg_param) {
1928                 /* register parameter, smaller costs for register parameters */
1929                 int max_regs = cc & ~cc_bits;
1930
1931                 if (max_regs < n_params)
1932                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1933                 else
1934                         weight += n_params * 2;
1935         } else {
1936                 /* parameters are passed an stack */
1937                 weight += 5 * n_params;
1938         }
1939
1940         /* constant parameters improve the benefice */
1941         frame_ptr = get_irg_frame(current_ir_graph);
1942         all_const = 1;
1943         for (i = 0; i < n_params; ++i) {
1944                 ir_node *param = get_Call_param(call, i);
1945
1946                 if (is_Const(param)) {
1947                         weight += get_method_param_weight(ent, i);
1948                 } else {
1949                         all_const = 0;
1950                         if (is_SymConst(param))
1951                                 weight += get_method_param_weight(ent, i);
1952                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1953                                 /*
1954                                  * An address of a local variable is transmitted. After
1955                                  * inlining, scalar_replacement might be able to remove the
1956                                  * local variable, so honor this.
1957                                  */
1958                                 v = get_method_local_adress_weight(callee, i);
1959                                 weight += v;
1960                                 if (v > 0)
1961                                         *local_adr = 1;
1962                         }
1963                 }
1964         }
1965
1966         callee_env = get_irg_link(callee);
1967         if (callee_env->n_callers == 1
1968                         && callee != current_ir_graph
1969                         && get_entity_visibility(ent) == visibility_local) {
1970                 weight += 700;
1971         }
1972
1973         /* give a bonus for functions with one block */
1974         if (callee_env->n_blocks == 1)
1975                 weight = weight * 3 / 2;
1976
1977         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1978         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1979                 weight += 2000;
1980
1981         /* and finally for leaves: they do not increase the register pressure
1982            because of callee safe registers */
1983         if (callee_env->n_call_nodes == 0)
1984                 weight += 400;
1985
1986         /*
1987          * Reduce the weight for recursive function IFF not all arguments are const.
1988          * inlining recursive functions is rarely good.
1989          */
1990         if (callee_env->recursive && !all_const)
1991                 weight -= 2000;
1992
1993         /** it's important to inline inner loops first */
1994         if (entry->loop_depth > 30)
1995                 weight += 30 * 1024;
1996         else
1997                 weight += entry->loop_depth * 1024;
1998
1999         /*
2000          * All arguments constant is probably a good sign, give an extra bonus
2001          */
2002         if (all_const)
2003                 weight += 1308;
2004
2005         return weight;
2006 }
2007
2008 static ir_graph **irgs;
2009 static int      last_irg;
2010
2011 static void callgraph_walker(ir_graph *irg, void *data)
2012 {
2013         (void) data;
2014         irgs[last_irg++] = irg;
2015 }
2016
2017 static ir_graph **create_irg_list(void)
2018 {
2019         ir_entity **free_methods;
2020         int       arr_len;
2021         int       n_irgs = get_irp_n_irgs();
2022
2023         cgana(&arr_len, &free_methods);
2024         xfree(free_methods);
2025
2026         compute_callgraph();
2027
2028         last_irg = 0;
2029         irgs     = xmalloc(n_irgs * sizeof(*irgs));
2030         memset(irgs, 0, sizeof(n_irgs * sizeof(*irgs)));
2031
2032         callgraph_walk(NULL, callgraph_walker, NULL);
2033         assert(n_irgs == last_irg);
2034
2035         return irgs;
2036 }
2037
2038 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2039                             int inline_threshold)
2040 {
2041         ir_graph            *callee    = call->callee;
2042         irg_inline_property  prop      = get_irg_inline_property(callee);
2043         unsigned             local_adr;
2044         int                  benefice;
2045
2046         benefice        = calc_inline_benefice(call, callee, &local_adr);
2047         call->local_adr = local_adr;
2048
2049         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2050                 get_irn_irg(call->call), call->call, callee, benefice));
2051
2052         if (benefice < inline_threshold && !(prop & irg_inline_forced))
2053                 return;
2054
2055         pqueue_put(pqueue, call, benefice);
2056 }
2057
2058 static void inline_into(ir_graph *irg, unsigned maxsize,
2059                 int inline_threshold, pmap *copied_graphs)
2060 {
2061         int             phiproj_computed = 0;
2062         inline_irg_env *env = get_irg_link(irg);
2063         call_entry     *curr_call;
2064         wenv_t          wenv;
2065
2066         if (env->n_nodes > maxsize) {
2067                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2068                 return;
2069         }
2070
2071         current_ir_graph = irg;
2072
2073         /* put irgs into the pqueue */
2074         pqueue_t *pqueue = new_pqueue();
2075
2076         for (curr_call = env->call_head; curr_call != NULL;
2077                         curr_call = curr_call->next) {
2078
2079                 if (is_Tuple(curr_call->call))
2080                         continue;
2081                 assert(is_Call(curr_call->call));
2082
2083                 maybe_push_call(pqueue, curr_call, inline_threshold);
2084         }
2085
2086         /* note that the list of possible calls is updated during the process */
2087         while (!pqueue_empty(pqueue)) {
2088                 int                  did_inline;
2089                 pmap_entry          *e;
2090                 call_entry          *curr_call = pqueue_pop_front(pqueue);
2091                 ir_graph            *callee    = curr_call->callee;
2092                 ir_node             *call_node = curr_call->call;
2093                 irg_inline_property  prop      = get_irg_inline_property(callee);
2094                 const call_entry    *centry;
2095                 inline_irg_env      *callee_env = get_irg_link(callee);
2096                 int                  depth      = curr_call->loop_depth;
2097
2098                 if (! (prop & irg_inline_forced)
2099                                 && env->n_nodes + callee_env->n_nodes > maxsize) {
2100                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2101                                                 env->n_nodes, callee, callee_env->n_nodes));
2102                         continue;
2103                 }
2104
2105                 e = pmap_find(copied_graphs, callee);
2106                 if (e != NULL) {
2107                         /*
2108                         * Remap callee if we have a copy.
2109                         * FIXME: Should we do this only for recursive Calls ?
2110                         */
2111                         callee = e->value;
2112                 }
2113
2114                 if (current_ir_graph == callee) {
2115                         /*
2116                          * Recursive call: we cannot directly inline because we cannot
2117                          * walk the graph and change it. So we have to make a copy of
2118                          * the graph first.
2119                          */
2120                         inline_irg_env *callee_env;
2121                         ir_graph       *copy;
2122
2123                         /*
2124                          * No copy yet, create one.
2125                          * Note that recursive methods are never leaves, so it is
2126                          * sufficient to test this condition here.
2127                          */
2128                         copy = create_irg_copy(callee);
2129
2130                         /* create_irg_copy() destroys the Proj links, recompute them */
2131                         phiproj_computed = 0;
2132
2133                         /* allocate new environment */
2134                         callee_env = alloc_inline_irg_env();
2135                         set_irg_link(copy, callee_env);
2136
2137                         assure_cf_loop(copy);
2138                         wenv.x              = callee_env;
2139                         wenv.ignore_callers = 1;
2140                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2141
2142                         /*
2143                          * Enter the entity of the original graph. This is needed
2144                          * for inline_method(). However, note that ent->irg still points
2145                          * to callee, NOT to copy.
2146                          */
2147                         set_irg_entity(copy, get_irg_entity(callee));
2148
2149                         pmap_insert(copied_graphs, callee, copy);
2150                         callee = copy;
2151
2152                         /* we have only one caller: the original graph */
2153                         callee_env->n_callers      = 1;
2154                         callee_env->n_callers_orig = 1;
2155                 }
2156                 if (! phiproj_computed) {
2157                         phiproj_computed = 1;
2158                         collect_phiprojs(current_ir_graph);
2159                 }
2160                 did_inline = inline_method(call_node, callee);
2161                 if (!did_inline)
2162                         continue;
2163
2164                 /* was inlined, must be recomputed */
2165                 phiproj_computed = 0;
2166
2167                 /* after we have inlined callee, all called methods inside
2168                  * callee are now called once more */
2169                 for (centry = callee_env->call_head; centry != NULL;
2170                                 centry = centry->next) {
2171                         inline_irg_env *penv = get_irg_link(centry->callee);
2172                         ++penv->n_callers;
2173                 }
2174
2175                 /* callee was inline. Append it's call list. */
2176                 env->got_inline = 1;
2177                 if (curr_call->local_adr)
2178                         env->local_vars = 1;
2179                 --env->n_call_nodes;
2180
2181                 /* we just generate a bunch of new calls */
2182                 for (centry = callee_env->call_head; centry != NULL;
2183                                 centry = centry->next) {
2184                         /* Note that the src list points to Call nodes in the inlined graph,
2185                          * but we need Call nodes in our graph. Luckily the inliner leaves
2186                          * this information in the link field. */
2187
2188                         ir_node    *new_call = get_irn_link(centry->call);
2189                         call_entry *new_entry;
2190
2191                         if (!is_Call(new_call))
2192                                 continue;
2193
2194                         new_entry = duplicate_call_entry(centry, depth, new_call);
2195                         maybe_push_call(pqueue, new_entry, inline_threshold);
2196                 }
2197
2198                 env->n_call_nodes += callee_env->n_call_nodes;
2199                 env->n_nodes += callee_env->n_nodes;
2200                 --callee_env->n_callers;
2201         }
2202
2203         del_pqueue(pqueue);
2204 }
2205
2206 /**
2207  * Heuristic inliner. Calculates a benefice value for every call and inlines
2208  * those calls with a value higher than the threshold.
2209  */
2210 void inline_functions(unsigned maxsize, int inline_threshold) {
2211         inline_irg_env   *env;
2212         int              i, n_irgs;
2213         ir_graph         *rem;
2214         wenv_t           wenv;
2215         pmap             *copied_graphs;
2216         pmap_entry       *pm_entry;
2217         ir_graph         **irgs;
2218
2219         rem = current_ir_graph;
2220         obstack_init(&temp_obst);
2221
2222         irgs = create_irg_list();
2223
2224         /* a map for the copied graphs, used to inline recursive calls */
2225         copied_graphs = pmap_create();
2226
2227         /* extend all irgs by a temporary data structure for inlining. */
2228         n_irgs = get_irp_n_irgs();
2229         for (i = 0; i < n_irgs; ++i)
2230                 set_irg_link(irgs[i], alloc_inline_irg_env());
2231
2232         /* Precompute information in temporary data structure. */
2233         wenv.ignore_runtime = 0;
2234         wenv.ignore_callers = 0;
2235         for (i = 0; i < n_irgs; ++i) {
2236                 ir_graph *irg = irgs[i];
2237
2238                 free_callee_info(irg);
2239
2240                 wenv.x         = get_irg_link(irg);
2241                 assure_cf_loop(irg);
2242                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2243         }
2244
2245         /* -- and now inline. -- */
2246         for (i = 0; i < n_irgs; ++i) {
2247                 ir_graph *irg = irgs[i];
2248
2249                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2250         }
2251
2252         for (i = 0; i < n_irgs; ++i) {
2253                 ir_graph *irg = irgs[i];
2254
2255                 env = get_irg_link(irg);
2256                 if (env->got_inline) {
2257                         /* this irg got calls inlined: optimize it */
2258
2259                         if (0) {
2260                                 /* scalar replacement does not work well with Tuple nodes, so optimize them away */
2261                                 optimize_graph_df(irg);
2262
2263                                 if (env->local_vars) {
2264                                         if (scalar_replacement_opt(irg)) {
2265                                                 optimize_graph_df(irg);
2266                                         }
2267                                 }
2268                                 optimize_cf(irg);
2269                         } else {
2270                                 if (env->local_vars) {
2271                                         scalar_replacement_opt(irg);
2272                                 }
2273                                 combo(irg);
2274                         }
2275                 }
2276                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2277                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2278                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2279                         env->n_callers_orig, env->n_callers,
2280                         get_entity_name(get_irg_entity(irg))));
2281                 }
2282         }
2283
2284         /* kill the copied graphs: we don't need them anymore */
2285         foreach_pmap(copied_graphs, pm_entry) {
2286                 ir_graph *copy = pm_entry->value;
2287
2288                 /* reset the entity, otherwise it will be deleted in the next step ... */
2289                 set_irg_entity(copy, NULL);
2290                 free_ir_graph(copy);
2291         }
2292         pmap_destroy(copied_graphs);
2293
2294         xfree(irgs);
2295
2296         obstack_free(&temp_obst, NULL);
2297         current_ir_graph = rem;
2298 }
2299
2300 void firm_init_inline(void) {
2301         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2302 }