nsz Git - libfirm/blob - ir/opt/opt_inline.c

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief    Dead node elimination and Procedure Inlining.
  23  * @author   Michael Beck, Goetz Lindenmaier
  24  * @version  $Id$
  25  */
  26 #include "config.h"
  27
  28 #include <limits.h>
  29 #include <assert.h>
  30
  31 #include "irnode_t.h"
  32 #include "irgraph_t.h"
  33 #include "irprog_t.h"
  34
  35 #include "iroptimize.h"
  36 #include "ircons_t.h"
  37 #include "iropt_t.h"
  38 #include "irgopt.h"
  39 #include "irgmod.h"
  40 #include "irgwalk.h"
  41
  42 #include "array_t.h"
  43 #include "list.h"
  44 #include "pset.h"
  45 #include "pmap.h"
  46 #include "pdeq.h"
  47 #include "xmalloc.h"
  48 #include "pqueue.h"
  49
  50 #include "irouts.h"
  51 #include "irloop_t.h"
  52 #include "irbackedge_t.h"
  53 #include "opt_init.h"
  54 #include "cgana.h"
  55 #include "trouts.h"
  56 #include "error.h"
  57
  58 #include "analyze_irg_args.h"
  59 #include "iredges_t.h"
  60 #include "irflag_t.h"
  61 #include "irhooks.h"
  62 #include "irtools.h"
  63 #include "iropt_dbg.h"
  64 #include "irpass_t.h"
  65
  66 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  67
  68 /*------------------------------------------------------------------*/
  69 /* Routines for dead node elimination / copying garbage collection  */
  70 /* of the obstack.                                                  */
  71 /*------------------------------------------------------------------*/
  72
  73 /**
  74  * Remember the new node in the old node by using a field all nodes have.
  75  */
  76 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
  77
  78 /**
  79  * Get this new node, before the old node is forgotten.
  80  */
  81 #define get_new_node(oldn) get_irn_link(oldn)
  82
  83 /**
  84  * Check if a new node was set.
  85  */
  86 #define has_new_node(n) (get_new_node(n) != NULL)
  87
  88 /**
  89  * We use the block_visited flag to mark that we have computed the
  90  * number of useful predecessors for this block.
  91  * Further we encode the new arity in this flag in the old blocks.
  92  * Remembering the arity is useful, as it saves a lot of pointer
  93  * accesses.  This function is called for all Phi and Block nodes
  94  * in a Block.
  95  */
  96 static inline int compute_new_arity(ir_node *b)
  97 {
  98         int i, res, irn_arity;
  99         int irg_v, block_v;
 100
 101         irg_v = get_irg_block_visited(current_ir_graph);
 102         block_v = get_Block_block_visited(b);
 103         if (block_v >= irg_v) {
 104                 /* we computed the number of preds for this block and saved it in the
 105                    block_v flag */
 106                 return block_v - irg_v;
 107         } else {
 108                 /* compute the number of good predecessors */
 109                 res = irn_arity = get_irn_arity(b);
 110                 for (i = 0; i < irn_arity; i++)
 111                         if (is_Bad(get_irn_n(b, i))) res--;
 112                         /* save it in the flag. */
 113                         set_Block_block_visited(b, irg_v + res);
 114                         return res;
 115         }
 116 }
 117
 118 /**
 119  * Copies the node to the new obstack. The Ins of the new node point to
 120  * the predecessors on the old obstack.  For block/phi nodes not all
 121  * predecessors might be copied.  n->link points to the new node.
 122  * For Phi and Block nodes the function allocates in-arrays with an arity
 123  * only for useful predecessors.  The arity is determined by counting
 124  * the non-bad predecessors of the block.
 125  *
 126  * @param n    The node to be copied
 127  * @param env  if non-NULL, the node number attribute will be copied to the new node
 128  *
 129  * Note: Also used for loop unrolling.
 130  */
 131 static void copy_node(ir_node *n, void *env)
 132 {
 133         ir_node *nn, *block;
 134         int new_arity;
 135         ir_op *op = get_irn_op(n);
 136         (void) env;
 137
 138         if (op == op_Bad) {
 139                 /* node copied already */
 140                 return;
 141         } else if (op == op_Block) {
 142                 block = NULL;
 143                 new_arity = compute_new_arity(n);
 144                 n->attr.block.graph_arr = NULL;
 145         } else {
 146                 block = get_nodes_block(n);
 147                 if (op == op_Phi) {
 148                         new_arity = compute_new_arity(block);
 149                 } else {
 150                         new_arity = get_irn_arity(n);
 151                 }
 152         }
 153         nn = new_ir_node(get_irn_dbg_info(n),
 154                 current_ir_graph,
 155                 block,
 156                 op,
 157                 get_irn_mode(n),
 158                 new_arity,
 159                 get_irn_in(n) + 1);
 160         /* Copy the attributes.  These might point to additional data.  If this
 161            was allocated on the old obstack the pointers now are dangling.  This
 162            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
 163         if (op == op_Block) {
 164                 /* we cannot allow blocks WITHOUT macroblock input */
 165                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
 166         }
 167         copy_node_attr(n, nn);
 168
 169         if (env != NULL) {
 170                 /* for easier debugging, we want to copy the node numbers too */
 171                 nn->node_nr = n->node_nr;
 172         }
 173
 174         set_new_node(n, nn);
 175         hook_dead_node_elim_subst(current_ir_graph, n, nn);
 176 }
 177
 178 /**
 179  * Copies new predecessors of old node to new node remembered in link.
 180  * Spare the Bad predecessors of Phi and Block nodes.
 181  */
 182 static void copy_preds(ir_node *n, void *env)
 183 {
 184         ir_node *nn, *block;
 185         int i, j, irn_arity;
 186         (void) env;
 187
 188         nn = get_new_node(n);
 189
 190         if (is_Block(n)) {
 191                 /* copy the macro block header */
 192                 ir_node *mbh = get_Block_MacroBlock(n);
 193
 194                 if (mbh == n) {
 195                         /* this block is a macroblock header */
 196                         set_Block_MacroBlock(nn, nn);
 197                 } else {
 198                         /* get the macro block header */
 199                         ir_node *nmbh = get_new_node(mbh);
 200                         assert(nmbh != NULL);
 201                         set_Block_MacroBlock(nn, nmbh);
 202                 }
 203
 204                 /* Don't copy Bad nodes. */
 205                 j = 0;
 206                 irn_arity = get_irn_arity(n);
 207                 for (i = 0; i < irn_arity; i++) {
 208                         if (! is_Bad(get_irn_n(n, i))) {
 209                                 ir_node *pred = get_irn_n(n, i);
 210                                 set_irn_n(nn, j, get_new_node(pred));
 211                                 j++;
 212                         }
 213                 }
 214                 /* repair the block visited flag from above misuse. Repair it in both
 215                    graphs so that the old one can still be used. */
 216                 set_Block_block_visited(nn, 0);
 217                 set_Block_block_visited(n, 0);
 218                 /* Local optimization could not merge two subsequent blocks if
 219                    in array contained Bads.  Now it's possible.
 220                    We don't call optimize_in_place as it requires
 221                    that the fields in ir_graph are set properly. */
 222                 if (!has_Block_entity(nn) &&
 223                     get_opt_control_flow_straightening() &&
 224                     get_Block_n_cfgpreds(nn) == 1 &&
 225                     is_Jmp(get_Block_cfgpred(nn, 0))) {
 226                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
 227                         if (nn == old) {
 228                                 /* Jmp jumps into the block it is in -- deal self cycle. */
 229                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
 230                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
 231                         } else {
 232                                 exchange(nn, old);
 233                         }
 234                 }
 235         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
 236                 /* Don't copy node if corresponding predecessor in block is Bad.
 237                    The Block itself should not be Bad. */
 238                 block = get_nodes_block(n);
 239                 set_nodes_block(nn, get_new_node(block));
 240                 j = 0;
 241                 irn_arity = get_irn_arity(n);
 242                 for (i = 0; i < irn_arity; i++) {
 243                         if (! is_Bad(get_irn_n(block, i))) {
 244                                 ir_node *pred = get_irn_n(n, i);
 245                                 set_irn_n(nn, j, get_new_node(pred));
 246                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
 247                                 j++;
 248                         }
 249                 }
 250                 /* If the pre walker reached this Phi after the post walker visited the
 251                    block block_visited is > 0. */
 252                 set_Block_block_visited(get_nodes_block(n), 0);
 253                 /* Compacting the Phi's ins might generate Phis with only one
 254                    predecessor. */
 255                 if (get_irn_arity(nn) == 1)
 256                         exchange(nn, get_irn_n(nn, 0));
 257         } else {
 258                 irn_arity = get_irn_arity(n);
 259                 for (i = -1; i < irn_arity; i++)
 260                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
 261         }
 262         /* Now the new node is complete.  We can add it to the hash table for CSE.
 263            @@@ inlining aborts if we identify End. Why? */
 264         if (!is_End(nn))
 265                 add_identities(current_ir_graph->value_table, nn);
 266 }
 267
 268 /**
 269  * Copies the graph recursively, compacts the keep-alives of the end node.
 270  *
 271  * @param irg           the graph to be copied
 272  * @param copy_node_nr  If non-zero, the node number will be copied
 273  */
 274 static void copy_graph(ir_graph *irg, int copy_node_nr)
 275 {
 276         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
 277         ir_node *ka;      /* keep alive */
 278         int i, irn_arity;
 279         unsigned long vfl;
 280
 281         /* Some nodes must be copied by hand, sigh */
 282         vfl = get_irg_visited(irg);
 283         set_irg_visited(irg, vfl + 1);
 284
 285         oe = get_irg_end(irg);
 286         mark_irn_visited(oe);
 287         /* copy the end node by hand, allocate dynamic in array! */
 288         ne = new_ir_node(get_irn_dbg_info(oe),
 289                 irg,
 290                 NULL,
 291                 op_End,
 292                 mode_X,
 293                 -1,
 294                 NULL);
 295         /* Copy the attributes.  Well, there might be some in the future... */
 296         copy_node_attr(oe, ne);
 297         set_new_node(oe, ne);
 298
 299         /* copy the Bad node */
 300         ob = get_irg_bad(irg);
 301         mark_irn_visited(ob);
 302         nb = new_ir_node(get_irn_dbg_info(ob),
 303                 irg,
 304                 NULL,
 305                 op_Bad,
 306                 mode_T,
 307                 0,
 308                 NULL);
 309         copy_node_attr(ob, nb);
 310         set_new_node(ob, nb);
 311
 312         /* copy the NoMem node */
 313         om = get_irg_no_mem(irg);
 314         mark_irn_visited(om);
 315         nm = new_ir_node(get_irn_dbg_info(om),
 316                 irg,
 317                 NULL,
 318                 op_NoMem,
 319                 mode_M,
 320                 0,
 321                 NULL);
 322         copy_node_attr(om, nm);
 323         set_new_node(om, nm);
 324
 325         /* copy the live nodes */
 326         set_irg_visited(irg, vfl);
 327         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 328
 329         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
 330
 331         /* visit the anchors as well */
 332         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 333                 ir_node *n = get_irg_anchor(irg, i);
 334
 335                 if (n && (get_irn_visited(n) <= vfl)) {
 336                         set_irg_visited(irg, vfl);
 337                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 338                 }
 339         }
 340
 341         /* copy_preds for the end node ... */
 342         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
 343
 344         /*- ... and now the keep alives. -*/
 345         /* First pick the not marked block nodes and walk them.  We must pick these
 346            first as else we will oversee blocks reachable from Phis. */
 347         irn_arity = get_End_n_keepalives(oe);
 348         for (i = 0; i < irn_arity; i++) {
 349                 ka = get_End_keepalive(oe, i);
 350                 if (is_Block(ka)) {
 351                         if (get_irn_visited(ka) <= vfl) {
 352                                 /* We must keep the block alive and copy everything reachable */
 353                                 set_irg_visited(irg, vfl);
 354                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 355                         }
 356                         add_End_keepalive(ne, get_new_node(ka));
 357                 }
 358         }
 359
 360         /* Now pick other nodes.  Here we will keep all! */
 361         irn_arity = get_End_n_keepalives(oe);
 362         for (i = 0; i < irn_arity; i++) {
 363                 ka = get_End_keepalive(oe, i);
 364                 if (!is_Block(ka)) {
 365                         if (get_irn_visited(ka) <= vfl) {
 366                                 /* We didn't copy the node yet.  */
 367                                 set_irg_visited(irg, vfl);
 368                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 369                         }
 370                         add_End_keepalive(ne, get_new_node(ka));
 371                 }
 372         }
 373
 374         /* start block sometimes only reached after keep alives */
 375         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
 376         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
 377 }
 378
 379 /**
 380  * Copies the graph reachable from current_ir_graph->end to the obstack
 381  * in current_ir_graph and fixes the environment.
 382  * Then fixes the fields in current_ir_graph containing nodes of the
 383  * graph.
 384  *
 385  * @param copy_node_nr  If non-zero, the node number will be copied
 386  */
 387 static void copy_graph_env(int copy_node_nr)
 388 {
 389         ir_graph *irg = current_ir_graph;
 390         ir_node *old_end, *new_anchor;
 391         int i;
 392
 393         /* remove end_except and end_reg nodes */
 394         old_end = get_irg_end(irg);
 395         set_irg_end_except (irg, old_end);
 396         set_irg_end_reg    (irg, old_end);
 397
 398         /* Not all nodes remembered in irg might be reachable
 399            from the end node.  Assure their link is set to NULL, so that
 400            we can test whether new nodes have been computed. */
 401         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 402                 ir_node *n = get_irg_anchor(irg, i);
 403                 if (n != NULL)
 404                         set_new_node(n, NULL);
 405         }
 406         /* we use the block walk flag for removing Bads from Blocks ins. */
 407         inc_irg_block_visited(irg);
 408
 409         /* copy the graph */
 410         copy_graph(irg, copy_node_nr);
 411
 412         /* fix the anchor */
 413         old_end    = get_irg_end(irg);
 414         new_anchor = new_Anchor(irg);
 415
 416         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 417                 ir_node *n = get_irg_anchor(irg, i);
 418                 if (n)
 419                         set_irn_n(new_anchor, i, get_new_node(n));
 420         }
 421         free_End(old_end);
 422         irg->anchor = new_anchor;
 423
 424         /* ensure the new anchor is placed in the endblock */
 425         set_nodes_block(new_anchor, get_irg_end_block(irg));
 426 }
 427
 428 /**
 429  * Copies all reachable nodes to a new obstack.  Removes bad inputs
 430  * from block nodes and the corresponding inputs from Phi nodes.
 431  * Merges single exit blocks with single entry blocks and removes
 432  * 1-input Phis.
 433  * Adds all new nodes to a new hash table for CSE.  Does not
 434  * perform CSE, so the hash table might contain common subexpressions.
 435  */
 436 void dead_node_elimination(ir_graph *irg)
 437 {
 438         ir_graph *rem;
 439 #ifdef INTERPROCEDURAL_VIEW
 440         int rem_ipview = get_interprocedural_view();
 441 #endif
 442         struct obstack *graveyard_obst = NULL;
 443         struct obstack *rebirth_obst   = NULL;
 444
 445         edges_deactivate(irg);
 446
 447         /* inform statistics that we started a dead-node elimination run */
 448         hook_dead_node_elim(irg, 1);
 449
 450         /* Remember external state of current_ir_graph. */
 451         rem = current_ir_graph;
 452         current_ir_graph = irg;
 453 #ifdef INTERPROCEDURAL_VIEW
 454         set_interprocedural_view(0);
 455 #endif
 456
 457         assert(get_irg_phase_state(irg) != phase_building);
 458
 459         /* Handle graph state */
 460         free_callee_info(irg);
 461         free_irg_outs(irg);
 462         free_trouts();
 463
 464         /* @@@ so far we loose loops when copying */
 465         free_loop_information(irg);
 466
 467         set_irg_doms_inconsistent(irg);
 468
 469         /* A quiet place, where the old obstack can rest in peace,
 470            until it will be cremated. */
 471         graveyard_obst = irg->obst;
 472
 473         /* A new obstack, where the reachable nodes will be copied to. */
 474         rebirth_obst = XMALLOC(struct obstack);
 475         irg->obst = rebirth_obst;
 476         obstack_init(irg->obst);
 477         irg->last_node_idx = 0;
 478
 479         /* We also need a new value table for CSE */
 480         del_identities(irg->value_table);
 481         irg->value_table = new_identities();
 482
 483         /* Copy the graph from the old to the new obstack */
 484         copy_graph_env(/*copy_node_nr=*/1);
 485
 486         /* Free memory from old unoptimized obstack */
 487         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
 488         xfree(graveyard_obst);            /* ... then free it.           */
 489
 490         /* inform statistics that the run is over */
 491         hook_dead_node_elim(irg, 0);
 492
 493         current_ir_graph = rem;
 494 #ifdef INTERPROCEDURAL_VIEW
 495         set_interprocedural_view(rem_ipview);
 496 #endif
 497 }
 498
 499 ir_graph_pass_t *dead_node_elimination_pass(const char *name)
 500 {
 501         return def_graph_pass(name ? name : "dce", dead_node_elimination);
 502 }
 503
 504 /**
 505  * Relink bad predecessors of a block and store the old in array to the
 506  * link field. This function is called by relink_bad_predecessors().
 507  * The array of link field starts with the block operand at position 0.
 508  * If block has bad predecessors, create a new in array without bad preds.
 509  * Otherwise let in array untouched.
 510  */
 511 static void relink_bad_block_predecessors(ir_node *n, void *env)
 512 {
 513         ir_node **new_in, *irn;
 514         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
 515         (void) env;
 516
 517         /* if link field of block is NULL, look for bad predecessors otherwise
 518            this is already done */
 519         if (is_Block(n) && get_irn_link(n) == NULL) {
 520                 /* save old predecessors in link field (position 0 is the block operand)*/
 521                 set_irn_link(n, get_irn_in(n));
 522
 523                 /* count predecessors without bad nodes */
 524                 old_irn_arity = get_irn_arity(n);
 525                 for (i = 0; i < old_irn_arity; i++)
 526                         if (!is_Bad(get_irn_n(n, i)))
 527                                 ++new_irn_arity;
 528
 529                 /* arity changing: set new predecessors without bad nodes */
 530                 if (new_irn_arity < old_irn_arity) {
 531                         /* Get new predecessor array. We do not resize the array, as we must
 532                            keep the old one to update Phis. */
 533                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
 534
 535                         /* set new predecessors in array */
 536                         new_in[0] = NULL;
 537                         new_irn_n = 1;
 538                         for (i = 0; i < old_irn_arity; i++) {
 539                                 irn = get_irn_n(n, i);
 540                                 if (!is_Bad(irn)) {
 541                                         new_in[new_irn_n] = irn;
 542                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
 543                                         ++new_irn_n;
 544                                 }
 545                         }
 546                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
 547                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
 548                         n->in = new_in;
 549                 } /* ir node has bad predecessors */
 550         } /* Block is not relinked */
 551 }
 552
 553 /**
 554  * Relinks Bad predecessors from Blocks and Phis called by walker
 555  * remove_bad_predecesors(). If n is a Block, call
 556  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
 557  * function of Phi's Block. If this block has bad predecessors, relink preds
 558  * of the Phi-node.
 559  */
 560 static void relink_bad_predecessors(ir_node *n, void *env)
 561 {
 562         ir_node *block, **old_in;
 563         int i, old_irn_arity, new_irn_arity;
 564
 565         /* relink bad predecessors of a block */
 566         if (is_Block(n))
 567                 relink_bad_block_predecessors(n, env);
 568
 569         /* If Phi node relink its block and its predecessors */
 570         if (is_Phi(n)) {
 571                 /* Relink predecessors of phi's block */
 572                 block = get_nodes_block(n);
 573                 if (get_irn_link(block) == NULL)
 574                         relink_bad_block_predecessors(block, env);
 575
 576                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
 577                 old_irn_arity = ARR_LEN(old_in);
 578
 579                 /* Relink Phi predecessors if count of predecessors changed */
 580                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
 581                         /* set new predecessors in array
 582                            n->in[0] remains the same block */
 583                         new_irn_arity = 1;
 584                         for (i = 1; i < old_irn_arity; i++)
 585                                 if (!is_Bad(old_in[i])) {
 586                                         n->in[new_irn_arity] = n->in[i];
 587                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
 588                                         ++new_irn_arity;
 589                                 }
 590
 591                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
 592                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
 593                 }
 594         } /* n is a Phi node */
 595 }
 596
 597 /*
 598  * Removes Bad Bad predecessors from Blocks and the corresponding
 599  * inputs to Phi nodes as in dead_node_elimination but without
 600  * copying the graph.
 601  * On walking up set the link field to NULL, on walking down call
 602  * relink_bad_predecessors() (This function stores the old in array
 603  * to the link field and sets a new in array if arity of predecessors
 604  * changes).
 605  */
 606 void remove_bad_predecessors(ir_graph *irg)
 607 {
 608         panic("Fix backedge handling first");
 609         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
 610 }
 611
 612
 613 /*
 614    __                      _  __ __
 615   (_     __    o     _    | \/  |_
 616   __)|_| | \_/ | \_/(/_   |_/\__|__
 617
 618   The following stuff implements a facility that automatically patches
 619   registered ir_node pointers to the new node when a dead node elimination occurs.
 620 */
 621
 622 struct _survive_dce_t {
 623         struct obstack obst;
 624         pmap *places;
 625         pmap *new_places;
 626         hook_entry_t dead_node_elim;
 627         hook_entry_t dead_node_elim_subst;
 628 };
 629
 630 typedef struct _survive_dce_list_t {
 631         struct _survive_dce_list_t *next;
 632         ir_node **place;
 633 } survive_dce_list_t;
 634
 635 static void dead_node_hook(void *context, ir_graph *irg, int start)
 636 {
 637         survive_dce_t *sd = context;
 638         (void) irg;
 639
 640         /* Create a new map before the dead node elimination is performed. */
 641         if (start) {
 642                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
 643         } else {
 644                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
 645                 pmap_destroy(sd->places);
 646                 sd->places     = sd->new_places;
 647                 sd->new_places = NULL;
 648         }
 649 }
 650
 651 /**
 652  * Hook called when dead node elimination replaces old by nw.
 653  */
 654 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw)
 655 {
 656         survive_dce_t *sd = context;
 657         survive_dce_list_t *list = pmap_get(sd->places, old);
 658         (void) irg;
 659
 660         /* If the node is to be patched back, write the new address to all registered locations. */
 661         if (list) {
 662                 survive_dce_list_t *p;
 663
 664                 for (p = list; p; p = p->next)
 665                         *(p->place) = nw;
 666
 667                 pmap_insert(sd->new_places, nw, list);
 668         }
 669 }
 670
 671 /**
 672  * Make a new Survive DCE environment.
 673  */
 674 survive_dce_t *new_survive_dce(void)
 675 {
 676         survive_dce_t *res = XMALLOC(survive_dce_t);
 677         obstack_init(&res->obst);
 678         res->places     = pmap_create();
 679         res->new_places = NULL;
 680
 681         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
 682         res->dead_node_elim.context                   = res;
 683         res->dead_node_elim.next                      = NULL;
 684
 685         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
 686         res->dead_node_elim_subst.context = res;
 687         res->dead_node_elim_subst.next    = NULL;
 688
 689         register_hook(hook_dead_node_elim, &res->dead_node_elim);
 690         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
 691         return res;
 692 }
 693
 694 /**
 695  * Free a Survive DCE environment.
 696  */
 697 void free_survive_dce(survive_dce_t *sd)
 698 {
 699         obstack_free(&sd->obst, NULL);
 700         pmap_destroy(sd->places);
 701         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
 702         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
 703         xfree(sd);
 704 }
 705
 706 /**
 707  * Register a node pointer to be patched upon DCE.
 708  * When DCE occurs, the node pointer specified by @p place will be
 709  * patched to the new address of the node it is pointing to.
 710  *
 711  * @param sd    The Survive DCE environment.
 712  * @param place The address of the node pointer.
 713  */
 714 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place)
 715 {
 716         if (*place != NULL) {
 717                 ir_node *irn      = *place;
 718                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
 719                 survive_dce_list_t *nw   = OALLOC(&sd->obst, survive_dce_list_t);
 720
 721                 nw->next  = curr;
 722                 nw->place = place;
 723
 724                 pmap_insert(sd->places, irn, nw);
 725         }
 726 }
 727
 728 /*--------------------------------------------------------------------*/
 729 /*  Functionality for inlining                                         */
 730 /*--------------------------------------------------------------------*/
 731
 732 /**
 733  * Copy node for inlineing.  Updates attributes that change when
 734  * inlineing but not for dead node elimination.
 735  *
 736  * Copies the node by calling copy_node() and then updates the entity if
 737  * it's a local one.  env must be a pointer of the frame type of the
 738  * inlined procedure. The new entities must be in the link field of
 739  * the entities.
 740  */
 741 static void copy_node_inline(ir_node *n, void *env)
 742 {
 743         ir_node *nn;
 744         ir_type *frame_tp = (ir_type *)env;
 745
 746         copy_node(n, NULL);
 747         if (is_Sel(n)) {
 748                 nn = get_new_node(n);
 749                 assert(is_Sel(nn));
 750                 /* use copied entities from the new frame */
 751                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
 752                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
 753                 }
 754         } else if (is_Block(n)) {
 755                 nn = get_new_node(n);
 756                 nn->attr.block.irg.irg = current_ir_graph;
 757         }
 758 }
 759
 760 /**
 761  * Copies new predecessors of old node and move constants to
 762  * the Start Block.
 763  */
 764 static void copy_preds_inline(ir_node *n, void *env)
 765 {
 766         ir_node *nn;
 767
 768         copy_preds(n, env);
 769         nn = skip_Id(get_new_node(n));
 770         if (is_irn_constlike(nn)) {
 771                 /* move Constants into the start block */
 772                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
 773
 774                 n = identify_remember(current_ir_graph->value_table, nn);
 775                 if (nn != n) {
 776                         DBG_OPT_CSE(nn, n);
 777                         exchange(nn, n);
 778                 }
 779         }
 780 }
 781
 782 /**
 783  * Walker: checks if P_value_arg_base is used.
 784  */
 785 static void find_addr(ir_node *node, void *env)
 786 {
 787         int *allow_inline = env;
 788         if (is_Sel(node)) {
 789                 ir_graph *irg = current_ir_graph;
 790                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
 791                         /* access to frame */
 792                         ir_entity *ent = get_Sel_entity(node);
 793                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
 794                                 /* access to value_type */
 795                                 *allow_inline = 0;
 796                         }
 797                 }
 798         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
 799                 /* From GCC:
 800                  * Refuse to inline alloca call unless user explicitly forced so as this
 801                  * may change program's memory overhead drastically when the function
 802                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
 803                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
 804                  *
 805                  * Sorrily this is true with our implementation also.
 806                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
 807                  * disables inlining of functions using VLA (with are completely save).
 808                  *
 809                  * 2 Solutions:
 810                  * - add a flag to the Alloc node for "real" alloca() calls
 811                  * - add a new Stack-Restore node at the end of a function using alloca()
 812                  */
 813                 *allow_inline = 0;
 814         }
 815 }
 816
 817 /**
 818  * Check if we can inline a given call.
 819  * Currently, we cannot inline two cases:
 820  * - call with compound arguments
 821  * - graphs that take the address of a parameter
 822  *
 823  * check these conditions here
 824  */
 825 static int can_inline(ir_node *call, ir_graph *called_graph)
 826 {
 827         ir_type *call_type = get_Call_type(call);
 828         int params, ress, i, res;
 829         assert(is_Method_type(call_type));
 830
 831         params = get_method_n_params(call_type);
 832         ress   = get_method_n_ress(call_type);
 833
 834         /* check parameters for compound arguments */
 835         for (i = 0; i < params; ++i) {
 836                 ir_type *p_type = get_method_param_type(call_type, i);
 837
 838                 if (is_compound_type(p_type))
 839                         return 0;
 840         }
 841
 842         /* check results for compound arguments */
 843         for (i = 0; i < ress; ++i) {
 844                 ir_type *r_type = get_method_res_type(call_type, i);
 845
 846                 if (is_compound_type(r_type))
 847                         return 0;
 848         }
 849
 850         res = 1;
 851         irg_walk_graph(called_graph, find_addr, NULL, &res);
 852
 853         return res;
 854 }
 855
 856 enum exc_mode {
 857         exc_handler,    /**< There is a handler. */
 858         exc_no_handler  /**< Exception handling not represented. */
 859 };
 860
 861 /* Inlines a method at the given call site. */
 862 int inline_method(ir_node *call, ir_graph *called_graph)
 863 {
 864         ir_node             *pre_call;
 865         ir_node             *post_call, *post_bl;
 866         ir_node             *in[pn_Start_max];
 867         ir_node             *end, *end_bl, *block;
 868         ir_node             **res_pred;
 869         ir_node             **cf_pred;
 870         ir_node             **args_in;
 871         ir_node             *ret, *phi;
 872         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
 873         int                 n_mem_phi;
 874         enum exc_mode       exc_handling;
 875         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
 876         ir_entity           *ent;
 877         ir_graph            *rem, *irg;
 878         irg_inline_property prop = get_irg_inline_property(called_graph);
 879         unsigned long       visited;
 880
 881         if (prop == irg_inline_forbidden)
 882                 return 0;
 883
 884         ent = get_irg_entity(called_graph);
 885
 886         mtp = get_entity_type(ent);
 887         ctp = get_Call_type(call);
 888         n_params = get_method_n_params(mtp);
 889         n_res    = get_method_n_ress(mtp);
 890         if (n_params > get_method_n_params(ctp)) {
 891                 /* this is a bad feature of C: without a prototype, we can
 892                  * call a function with less parameters than needed. Currently
 893                  * we don't support this, although we could use Unknown than. */
 894                 return 0;
 895         }
 896         if (n_res != get_method_n_ress(ctp)) {
 897                 return 0;
 898         }
 899
 900         /* Argh, compiling C has some bad consequences:
 901          * It is implementation dependent what happens in that case.
 902          * We support inlining, if the bitsize of the types matches AND
 903          * the same arithmetic is used. */
 904         for (i = n_params - 1; i >= 0; --i) {
 905                 ir_type *param_tp = get_method_param_type(mtp, i);
 906                 ir_type *arg_tp   = get_method_param_type(ctp, i);
 907
 908                 if (param_tp != arg_tp) {
 909                         ir_mode *pmode = get_type_mode(param_tp);
 910                         ir_mode *amode = get_type_mode(arg_tp);
 911
 912                         if (pmode == NULL || amode == NULL)
 913                                 return 0;
 914                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
 915                                 return 0;
 916                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
 917                                 return 0;
 918                         /* otherwise we can simply "reinterpret" the bits */
 919                 }
 920         }
 921         for (i = n_res - 1; i >= 0; --i) {
 922                 ir_type *decl_res_tp = get_method_res_type(mtp, i);
 923                 ir_type *used_res_tp = get_method_res_type(ctp, i);
 924
 925                 if (decl_res_tp != used_res_tp) {
 926                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
 927                         ir_mode *used_mode = get_type_mode(used_res_tp);
 928                         if (decl_mode == NULL || used_mode == NULL)
 929                                 return 0;
 930                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
 931                                 return 0;
 932                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
 933                                 return 0;
 934                         /* otherwise we can "reinterpret" the bits */
 935                 }
 936         }
 937
 938         irg = get_irn_irg(call);
 939
 940         /*
 941          * We cannot inline a recursive call. The graph must be copied before
 942          * the call the inline_method() using create_irg_copy().
 943          */
 944         if (called_graph == irg)
 945                 return 0;
 946
 947         /*
 948          * currently, we cannot inline two cases:
 949          * - call with compound arguments
 950          * - graphs that take the address of a parameter
 951          */
 952         if (! can_inline(call, called_graph))
 953                 return 0;
 954
 955         rem = current_ir_graph;
 956         current_ir_graph = irg;
 957
 958         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
 959
 960         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
 961         rem_opt = get_opt_optimize();
 962         set_optimize(0);
 963
 964         /* Handle graph state */
 965         assert(get_irg_phase_state(irg) != phase_building);
 966         assert(get_irg_pinned(irg) == op_pin_state_pinned);
 967         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
 968         set_irg_outs_inconsistent(irg);
 969         set_irg_extblk_inconsistent(irg);
 970         set_irg_doms_inconsistent(irg);
 971         set_irg_loopinfo_inconsistent(irg);
 972         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 973         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
 974
 975         /* -- Check preconditions -- */
 976         assert(is_Call(call));
 977
 978         /* here we know we WILL inline, so inform the statistics */
 979         hook_inline(call, called_graph);
 980
 981         /* -- Decide how to handle exception control flow: Is there a handler
 982            for the Call node, or do we branch directly to End on an exception?
 983            exc_handling:
 984            0 There is a handler.
 985            2 Exception handling not represented in Firm. -- */
 986         {
 987                 ir_node *Xproj = NULL;
 988                 ir_node *proj;
 989                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
 990                         long proj_nr = get_Proj_proj(proj);
 991                         if (proj_nr == pn_Call_X_except) Xproj = proj;
 992                 }
 993                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
 994         }
 995
 996         /* create the argument tuple */
 997         NEW_ARR_A(ir_type *, args_in, n_params);
 998
 999         block = get_nodes_block(call);
1000         for (i = n_params - 1; i >= 0; --i) {
1001                 ir_node *arg      = get_Call_param(call, i);
1002                 ir_type *param_tp = get_method_param_type(mtp, i);
1003                 ir_mode *mode     = get_type_mode(param_tp);
1004
1005                 if (mode != get_irn_mode(arg)) {
1006                         arg = new_r_Conv(block, arg, mode);
1007                 }
1008                 args_in[i] = arg;
1009         }
1010
1011         /* --
1012            the procedure and later replaces the Start node of the called graph.
1013            Post_call is the old Call node and collects the results of the called
1014            graph. Both will end up being a tuple.  -- */
1015         post_bl = get_nodes_block(call);
1016         set_irg_current_block(irg, post_bl);
1017         /* XxMxPxPxPxT of Start + parameter of Call */
1018         in[pn_Start_X_initial_exec]   = new_Jmp();
1019         in[pn_Start_M]                = get_Call_mem(call);
1020         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
1021         in[pn_Start_P_tls]            = get_irg_tls(irg);
1022         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
1023         pre_call = new_Tuple(pn_Start_max, in);
1024         post_call = call;
1025
1026         /* --
1027            The new block gets the ins of the old block, pre_call and all its
1028            predecessors and all Phi nodes. -- */
1029         part_block(pre_call);
1030
1031         /* -- Prepare state for dead node elimination -- */
1032         /* Visited flags in calling irg must be >= flag in called irg.
1033            Else walker and arity computation will not work. */
1034         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
1035                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
1036         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
1037                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
1038         visited = get_irg_visited(irg);
1039
1040         /* Set pre_call as new Start node in link field of the start node of
1041            calling graph and pre_calls block as new block for the start block
1042            of calling graph.
1043            Further mark these nodes so that they are not visited by the
1044            copying. */
1045         set_irn_link(get_irg_start(called_graph), pre_call);
1046         set_irn_visited(get_irg_start(called_graph), visited);
1047         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1048         set_irn_visited(get_irg_start_block(called_graph), visited);
1049
1050         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1051         set_irn_visited(get_irg_bad(called_graph), visited);
1052
1053         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1054         set_irn_visited(get_irg_no_mem(called_graph), visited);
1055
1056         /* Initialize for compaction of in arrays */
1057         inc_irg_block_visited(irg);
1058
1059         /* -- Replicate local entities of the called_graph -- */
1060         /* copy the entities. */
1061         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
1062         called_frame = get_irg_frame_type(called_graph);
1063         curr_frame   = get_irg_frame_type(irg);
1064         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1065                 ir_entity *new_ent, *old_ent;
1066                 old_ent = get_class_member(called_frame, i);
1067                 new_ent = copy_entity_own(old_ent, curr_frame);
1068                 set_entity_link(old_ent, new_ent);
1069         }
1070
1071         /* visited is > than that of called graph.  With this trick visited will
1072            remain unchanged so that an outer walker, e.g., searching the call nodes
1073             to inline, calling this inline will not visit the inlined nodes. */
1074         set_irg_visited(irg, get_irg_visited(irg)-1);
1075
1076         /* -- Performing dead node elimination inlines the graph -- */
1077         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1078            entities. */
1079         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1080                  get_irg_frame_type(called_graph));
1081
1082         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
1083
1084         /* Repair called_graph */
1085         set_irg_visited(called_graph, get_irg_visited(irg));
1086         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1087         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1088
1089         /* -- Merge the end of the inlined procedure with the call site -- */
1090         /* We will turn the old Call node into a Tuple with the following
1091            predecessors:
1092            -1:  Block of Tuple.
1093            0: Phi of all Memories of Return statements.
1094            1: Jmp from new Block that merges the control flow from all exception
1095            predecessors of the old end block.
1096            2: Tuple of all arguments.
1097            3: Phi of Exception memories.
1098            In case the old Call directly branches to End on an exception we don't
1099            need the block merging all exceptions nor the Phi of the exception
1100            memories.
1101         */
1102
1103         /* -- Precompute some values -- */
1104         end_bl = get_new_node(get_irg_end_block(called_graph));
1105         end = get_new_node(get_irg_end(called_graph));
1106         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1107         n_res = get_method_n_ress(get_Call_type(call));
1108
1109         res_pred = XMALLOCN(ir_node*, n_res);
1110         cf_pred  = XMALLOCN(ir_node*, arity);
1111
1112         set_irg_current_block(irg, post_bl); /* just to make sure */
1113
1114         /* -- archive keepalives -- */
1115         irn_arity = get_irn_arity(end);
1116         for (i = 0; i < irn_arity; i++) {
1117                 ir_node *ka = get_End_keepalive(end, i);
1118                 if (! is_Bad(ka))
1119                         add_End_keepalive(get_irg_end(irg), ka);
1120         }
1121
1122         /* The new end node will die.  We need not free as the in array is on the obstack:
1123            copy_node() only generated 'D' arrays. */
1124
1125         /* -- Replace Return nodes by Jump nodes. -- */
1126         n_ret = 0;
1127         for (i = 0; i < arity; i++) {
1128                 ir_node *ret;
1129                 ret = get_Block_cfgpred(end_bl, i);
1130                 if (is_Return(ret)) {
1131                         cf_pred[n_ret] = new_r_Jmp(get_nodes_block(ret));
1132                         n_ret++;
1133                 }
1134         }
1135         set_irn_in(post_bl, n_ret, cf_pred);
1136
1137         /* -- Build a Tuple for all results of the method.
1138            Add Phi node if there was more than one Return.  -- */
1139         turn_into_tuple(post_call, pn_Call_max);
1140         /* First the Memory-Phi */
1141         n_mem_phi = 0;
1142         for (i = 0; i < arity; i++) {
1143                 ret = get_Block_cfgpred(end_bl, i);
1144                 if (is_Return(ret)) {
1145                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
1146                 }
1147                 /* memory output for some exceptions is directly connected to End */
1148                 if (is_Call(ret)) {
1149                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 3);
1150                 } else if (is_fragile_op(ret)) {
1151                         /* We rely that all cfops have the memory output at the same position. */
1152                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 0);
1153                 } else if (is_Raise(ret)) {
1154                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 1);
1155                 }
1156         }
1157         phi = new_Phi(n_mem_phi, cf_pred, mode_M);
1158         set_Tuple_pred(call, pn_Call_M, phi);
1159         /* Conserve Phi-list for further inlinings -- but might be optimized */
1160         if (get_nodes_block(phi) == post_bl) {
1161                 set_irn_link(phi, get_irn_link(post_bl));
1162                 set_irn_link(post_bl, phi);
1163         }
1164         /* Now the real results */
1165         if (n_res > 0) {
1166                 for (j = 0; j < n_res; j++) {
1167                         ir_type *res_type = get_method_res_type(ctp, j);
1168                         ir_mode *res_mode = get_type_mode(res_type);
1169                         n_ret = 0;
1170                         for (i = 0; i < arity; i++) {
1171                                 ret = get_Block_cfgpred(end_bl, i);
1172                                 if (is_Return(ret)) {
1173                                         ir_node *res = get_Return_res(ret, j);
1174                                         if (get_irn_mode(res) != res_mode) {
1175                                                 ir_node *block = get_nodes_block(res);
1176                                                 res = new_r_Conv(block, res, res_mode);
1177                                         }
1178                                         cf_pred[n_ret] = res;
1179                                         n_ret++;
1180                                 }
1181                         }
1182                         if (n_ret > 0)
1183                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1184                         else
1185                                 phi = new_Bad();
1186                         res_pred[j] = phi;
1187                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1188                         if (get_nodes_block(phi) == post_bl) {
1189                                 set_Phi_next(phi, get_Block_phis(post_bl));
1190                                 set_Block_phis(post_bl, phi);
1191                         }
1192                 }
1193                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1194         } else {
1195                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1196         }
1197         /* handle the regular call */
1198         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1199
1200         /* For now, we cannot inline calls with value_base */
1201         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1202
1203         /* Finally the exception control flow.
1204            We have two possible situations:
1205            First if the Call branches to an exception handler:
1206            We need to add a Phi node to
1207            collect the memory containing the exception objects.  Further we need
1208            to add another block to get a correct representation of this Phi.  To
1209            this block we add a Jmp that resolves into the X output of the Call
1210            when the Call is turned into a tuple.
1211            Second: There is no exception edge. Just add all inlined exception
1212            branches to the End node.
1213          */
1214         if (exc_handling == exc_handler) {
1215                 n_exc = 0;
1216                 for (i = 0; i < arity; i++) {
1217                         ir_node *ret, *irn;
1218                         ret = get_Block_cfgpred(end_bl, i);
1219                         irn = skip_Proj(ret);
1220                         if (is_fragile_op(irn) || is_Raise(irn)) {
1221                                 cf_pred[n_exc] = ret;
1222                                 ++n_exc;
1223                         }
1224                 }
1225                 if (n_exc > 0) {
1226                         if (n_exc == 1) {
1227                                 /* simple fix */
1228                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
1229                         } else {
1230                                 ir_node *block = new_Block(n_exc, cf_pred);
1231                                 set_cur_block(block);
1232                                 set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1233                         }
1234                 } else {
1235                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1236                 }
1237         } else {
1238                 ir_node *main_end_bl;
1239                 int main_end_bl_arity;
1240                 ir_node **end_preds;
1241
1242                 /* assert(exc_handling == 1 || no exceptions. ) */
1243                 n_exc = 0;
1244                 for (i = 0; i < arity; i++) {
1245                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1246                         ir_node *irn = skip_Proj(ret);
1247
1248                         if (is_fragile_op(irn) || is_Raise(irn)) {
1249                                 cf_pred[n_exc] = ret;
1250                                 n_exc++;
1251                         }
1252                 }
1253                 main_end_bl       = get_irg_end_block(irg);
1254                 main_end_bl_arity = get_irn_arity(main_end_bl);
1255                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
1256
1257                 for (i = 0; i < main_end_bl_arity; ++i)
1258                         end_preds[i] = get_irn_n(main_end_bl, i);
1259                 for (i = 0; i < n_exc; ++i)
1260                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1261                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1262                 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1263                 free(end_preds);
1264         }
1265         free(res_pred);
1266         free(cf_pred);
1267
1268         /* --  Turn CSE back on. -- */
1269         set_optimize(rem_opt);
1270         current_ir_graph = rem;
1271
1272         return 1;
1273 }
1274
1275 /********************************************************************/
1276 /* Apply inlining to small methods.                                 */
1277 /********************************************************************/
1278
1279 static struct obstack  temp_obst;
1280
1281 /** Represents a possible inlinable call in a graph. */
1282 typedef struct _call_entry {
1283         ir_node    *call;       /**< The Call node. */
1284         ir_graph   *callee;     /**< The callee IR-graph. */
1285         list_head  list;        /**< List head for linking the next one. */
1286         int        loop_depth;  /**< The loop depth of this call. */
1287         int        benefice;    /**< The calculated benefice of this call. */
1288         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
1289         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
1290 } call_entry;
1291
1292 /**
1293  * environment for inlining small irgs
1294  */
1295 typedef struct _inline_env_t {
1296         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
1297         list_head      calls; /**< The call entry list. */
1298 } inline_env_t;
1299
1300 /**
1301  * Returns the irg called from a Call node. If the irg is not
1302  * known, NULL is returned.
1303  *
1304  * @param call  the call node
1305  */
1306 static ir_graph *get_call_called_irg(ir_node *call)
1307 {
1308         ir_node *addr;
1309
1310         addr = get_Call_ptr(call);
1311         if (is_Global(addr)) {
1312                 ir_entity *ent = get_Global_entity(addr);
1313                 /* we don't know which function gets finally bound to a weak symbol */
1314                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
1315                         return NULL;
1316
1317                 return get_entity_irg(ent);
1318         }
1319
1320         return NULL;
1321 }
1322
1323 /**
1324  * Walker: Collect all calls to known graphs inside a graph.
1325  */
1326 static void collect_calls(ir_node *call, void *env)
1327 {
1328         (void) env;
1329         if (is_Call(call)) {
1330                 ir_graph *called_irg = get_call_called_irg(call);
1331
1332                 if (called_irg != NULL) {
1333                         /* The Call node calls a locally defined method.  Remember to inline. */
1334                         inline_env_t *ienv  = env;
1335                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
1336                         entry->call       = call;
1337                         entry->callee     = called_irg;
1338                         entry->loop_depth = 0;
1339                         entry->benefice   = 0;
1340                         entry->local_adr  = 0;
1341                         entry->all_const  = 0;
1342
1343                         list_add_tail(&entry->list, &ienv->calls);
1344                 }
1345         }
1346 }
1347
1348 /**
1349  * Inlines all small methods at call sites where the called address comes
1350  * from a Const node that references the entity representing the called
1351  * method.
1352  * The size argument is a rough measure for the code size of the method:
1353  * Methods where the obstack containing the firm graph is smaller than
1354  * size are inlined.
1355  */
1356 void inline_small_irgs(ir_graph *irg, int size)
1357 {
1358         ir_graph *rem = current_ir_graph;
1359         inline_env_t env;
1360         call_entry *entry;
1361
1362         current_ir_graph = irg;
1363         /* Handle graph state */
1364         assert(get_irg_phase_state(irg) != phase_building);
1365         free_callee_info(irg);
1366
1367         /* Find Call nodes to inline.
1368            (We can not inline during a walk of the graph, as inlining the same
1369            method several times changes the visited flag of the walked graph:
1370            after the first inlining visited of the callee equals visited of
1371            the caller.  With the next inlining both are increased.) */
1372         obstack_init(&env.obst);
1373         INIT_LIST_HEAD(&env.calls);
1374         irg_walk_graph(irg, NULL, collect_calls, &env);
1375
1376         if (! list_empty(&env.calls)) {
1377                 /* There are calls to inline */
1378                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1379                 collect_phiprojs(irg);
1380
1381                 list_for_each_entry(call_entry, entry, &env.calls, list) {
1382                         ir_graph            *callee = entry->callee;
1383                         irg_inline_property prop    = get_irg_inline_property(callee);
1384
1385                         if (prop == irg_inline_forbidden) {
1386                                 continue;
1387                         }
1388
1389                         if (prop >= irg_inline_forced ||
1390                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1391                                 inline_method(entry->call, callee);
1392                         }
1393                 }
1394                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1395         }
1396         obstack_free(&env.obst, NULL);
1397         current_ir_graph = rem;
1398 }
1399
1400 struct inline_small_irgs_pass_t {
1401         ir_graph_pass_t pass;
1402         int            size;
1403 };
1404
1405 /**
1406  * Wrapper to run inline_small_irgs() as a pass.
1407  */
1408 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
1409 {
1410         struct inline_small_irgs_pass_t *pass = context;
1411
1412         inline_small_irgs(irg, pass->size);
1413         return 0;
1414 }
1415
1416 /* create a pass for inline_small_irgs() */
1417 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
1418 {
1419         struct inline_small_irgs_pass_t *pass =
1420                 XMALLOCZ(struct inline_small_irgs_pass_t);
1421
1422         pass->size = size;
1423         return def_graph_pass_constructor(
1424                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
1425 }
1426
1427 /**
1428  * Environment for inlining irgs.
1429  */
1430 typedef struct {
1431         list_head calls;             /**< List of of all call nodes in this graph. */
1432         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
1433         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1434         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
1435         unsigned  n_nodes_orig;      /**< for statistics */
1436         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
1437         unsigned  n_call_nodes_orig; /**< for statistics */
1438         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
1439         unsigned  n_callers_orig;    /**< for statistics */
1440         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
1441         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
1442 } inline_irg_env;
1443
1444 /**
1445  * Allocate a new environment for inlining.
1446  */
1447 static inline_irg_env *alloc_inline_irg_env(void)
1448 {
1449         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
1450         INIT_LIST_HEAD(&env->calls);
1451         env->local_weights     = NULL;
1452         env->n_nodes           = -2; /* do not count count Start, End */
1453         env->n_blocks          = -2; /* do not count count Start, End Block */
1454         env->n_nodes_orig      = -2; /* do not count Start, End */
1455         env->n_call_nodes      = 0;
1456         env->n_call_nodes_orig = 0;
1457         env->n_callers         = 0;
1458         env->n_callers_orig    = 0;
1459         env->got_inline        = 0;
1460         env->recursive         = 0;
1461         return env;
1462 }
1463
1464 typedef struct walker_env {
1465         inline_irg_env *x;     /**< the inline environment */
1466         char ignore_runtime;   /**< the ignore runtime flag */
1467         char ignore_callers;   /**< if set, do change callers data */
1468 } wenv_t;
1469
1470 /**
1471  * post-walker: collect all calls in the inline-environment
1472  * of a graph and sum some statistics.
1473  */
1474 static void collect_calls2(ir_node *call, void *ctx)
1475 {
1476         wenv_t         *env = ctx;
1477         inline_irg_env *x = env->x;
1478         ir_opcode      code = get_irn_opcode(call);
1479         ir_graph       *callee;
1480         call_entry     *entry;
1481
1482         /* count meaningful nodes in irg */
1483         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1484                 if (code != iro_Block) {
1485                         ++x->n_nodes;
1486                         ++x->n_nodes_orig;
1487                 } else {
1488                         ++x->n_blocks;
1489                 }
1490         }
1491
1492         if (code != iro_Call) return;
1493
1494         /* check, if it's a runtime call */
1495         if (env->ignore_runtime) {
1496                 ir_node *symc = get_Call_ptr(call);
1497
1498                 if (is_Global(symc)) {
1499                         ir_entity *ent = get_Global_entity(symc);
1500
1501                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1502                                 return;
1503                 }
1504         }
1505
1506         /* collect all call nodes */
1507         ++x->n_call_nodes;
1508         ++x->n_call_nodes_orig;
1509
1510         callee = get_call_called_irg(call);
1511         if (callee != NULL) {
1512                 if (! env->ignore_callers) {
1513                         inline_irg_env *callee_env = get_irg_link(callee);
1514                         /* count all static callers */
1515                         ++callee_env->n_callers;
1516                         ++callee_env->n_callers_orig;
1517                 }
1518                 if (callee == current_ir_graph)
1519                         x->recursive = 1;
1520
1521                 /* link it in the list of possible inlinable entries */
1522                 entry = OALLOC(&temp_obst, call_entry);
1523                 entry->call       = call;
1524                 entry->callee     = callee;
1525                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1526                 entry->benefice   = 0;
1527                 entry->local_adr  = 0;
1528                 entry->all_const  = 0;
1529
1530                 list_add_tail(&entry->list, &x->calls);
1531         }
1532 }
1533
1534 /**
1535  * Returns TRUE if the number of callers is 0 in the irg's environment,
1536  * hence this irg is a leave.
1537  */
1538 inline static int is_leave(ir_graph *irg)
1539 {
1540         inline_irg_env *env = get_irg_link(irg);
1541         return env->n_call_nodes == 0;
1542 }
1543
1544 /**
1545  * Returns TRUE if the number of nodes in the callee is
1546  * smaller then size in the irg's environment.
1547  */
1548 inline static int is_smaller(ir_graph *callee, unsigned size)
1549 {
1550         inline_irg_env *env = get_irg_link(callee);
1551         return env->n_nodes < size;
1552 }
1553
1554 /**
1555  * Duplicate a call entry.
1556  *
1557  * @param entry     the original entry to duplicate
1558  * @param new_call  the new call node
1559  * @param loop_depth_delta
1560  *                  delta value for the loop depth
1561  */
1562 static call_entry *duplicate_call_entry(const call_entry *entry,
1563                                         ir_node *new_call, int loop_depth_delta)
1564 {
1565         call_entry *nentry = OALLOC(&temp_obst, call_entry);
1566         nentry->call       = new_call;
1567         nentry->callee     = entry->callee;
1568         nentry->benefice   = entry->benefice;
1569         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1570         nentry->local_adr  = entry->local_adr;
1571         nentry->all_const  = entry->all_const;
1572
1573         return nentry;
1574 }
1575
1576 /**
1577  * Append all call nodes of the source environment to the nodes of in the destination
1578  * environment.
1579  *
1580  * @param dst         destination environment
1581  * @param src         source environment
1582  * @param loop_depth  the loop depth of the call that is replaced by the src list
1583  */
1584 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
1585 {
1586         call_entry *entry, *nentry;
1587
1588         /* Note that the src list points to Call nodes in the inlined graph, but
1589            we need Call nodes in our graph. Luckily the inliner leaves this information
1590            in the link field. */
1591         list_for_each_entry(call_entry, entry, &src->calls, list) {
1592                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
1593                 list_add_tail(&nentry->list, &dst->calls);
1594         }
1595         dst->n_call_nodes += src->n_call_nodes;
1596         dst->n_nodes      += src->n_nodes;
1597 }
1598
1599 /*
1600  * Inlines small leave methods at call sites where the called address comes
1601  * from a Const node that references the entity representing the called
1602  * method.
1603  * The size argument is a rough measure for the code size of the method:
1604  * Methods where the obstack containing the firm graph is smaller than
1605  * size are inlined.
1606  */
1607 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1608                             unsigned size, int ignore_runtime)
1609 {
1610         inline_irg_env   *env;
1611         ir_graph         *irg;
1612         int              i, n_irgs;
1613         ir_graph         *rem;
1614         int              did_inline;
1615         wenv_t           wenv;
1616         call_entry       *entry, *next;
1617         const call_entry *centry;
1618         pmap             *copied_graphs;
1619         pmap_entry       *pm_entry;
1620
1621         rem = current_ir_graph;
1622         obstack_init(&temp_obst);
1623
1624         /* a map for the copied graphs, used to inline recursive calls */
1625         copied_graphs = pmap_create();
1626
1627         /* extend all irgs by a temporary data structure for inlining. */
1628         n_irgs = get_irp_n_irgs();
1629         for (i = 0; i < n_irgs; ++i)
1630                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1631
1632         /* Pre-compute information in temporary data structure. */
1633         wenv.ignore_runtime = ignore_runtime;
1634         wenv.ignore_callers = 0;
1635         for (i = 0; i < n_irgs; ++i) {
1636                 ir_graph *irg = get_irp_irg(i);
1637
1638                 assert(get_irg_phase_state(irg) != phase_building);
1639                 free_callee_info(irg);
1640
1641                 assure_cf_loop(irg);
1642                 wenv.x = get_irg_link(irg);
1643                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1644         }
1645
1646         /* -- and now inline. -- */
1647
1648         /* Inline leaves recursively -- we might construct new leaves. */
1649         do {
1650                 did_inline = 0;
1651
1652                 for (i = 0; i < n_irgs; ++i) {
1653                         ir_node *call;
1654                         int phiproj_computed = 0;
1655
1656                         current_ir_graph = get_irp_irg(i);
1657                         env              = get_irg_link(current_ir_graph);
1658
1659                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1660                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1661                                 ir_graph            *callee;
1662                                 irg_inline_property  prop;
1663
1664                                 if (env->n_nodes > maxsize)
1665                                         break;
1666
1667                                 call   = entry->call;
1668                                 callee = entry->callee;
1669
1670                                 prop = get_irg_inline_property(callee);
1671                                 if (prop == irg_inline_forbidden) {
1672                                         continue;
1673                                 }
1674
1675                                 if (is_leave(callee) && (
1676                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1677                                         if (!phiproj_computed) {
1678                                                 phiproj_computed = 1;
1679                                                 collect_phiprojs(current_ir_graph);
1680                                         }
1681                                         did_inline = inline_method(call, callee);
1682
1683                                         if (did_inline) {
1684                                                 inline_irg_env *callee_env = get_irg_link(callee);
1685
1686                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1687                                                 phiproj_computed = 0;
1688
1689                                                 /* Do some statistics */
1690                                                 env->got_inline = 1;
1691                                                 --env->n_call_nodes;
1692                                                 env->n_nodes += callee_env->n_nodes;
1693                                                 --callee_env->n_callers;
1694
1695                                                 /* remove this call from the list */
1696                                                 list_del(&entry->list);
1697                                                 continue;
1698                                         }
1699                                 }
1700                         }
1701                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1702                 }
1703         } while (did_inline);
1704
1705         /* inline other small functions. */
1706         for (i = 0; i < n_irgs; ++i) {
1707                 ir_node *call;
1708                 int phiproj_computed = 0;
1709
1710                 current_ir_graph = get_irp_irg(i);
1711                 env              = get_irg_link(current_ir_graph);
1712
1713                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1714
1715                 /* note that the list of possible calls is updated during the process */
1716                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1717                         irg_inline_property prop;
1718                         ir_graph            *callee;
1719                         pmap_entry          *e;
1720
1721                         call   = entry->call;
1722                         callee = entry->callee;
1723
1724                         prop = get_irg_inline_property(callee);
1725                         if (prop == irg_inline_forbidden) {
1726                                 continue;
1727                         }
1728
1729                         e = pmap_find(copied_graphs, callee);
1730                         if (e != NULL) {
1731                                 /*
1732                                  * Remap callee if we have a copy.
1733                                  * FIXME: Should we do this only for recursive Calls ?
1734                                  */
1735                                 callee = e->value;
1736                         }
1737
1738                         if (prop >= irg_inline_forced ||
1739                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1740                                 if (current_ir_graph == callee) {
1741                                         /*
1742                                          * Recursive call: we cannot directly inline because we cannot walk
1743                                          * the graph and change it. So we have to make a copy of the graph
1744                                          * first.
1745                                          */
1746
1747                                         inline_irg_env *callee_env;
1748                                         ir_graph       *copy;
1749
1750                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1751
1752                                         /*
1753                                          * No copy yet, create one.
1754                                          * Note that recursive methods are never leaves, so it is sufficient
1755                                          * to test this condition here.
1756                                          */
1757                                         copy = create_irg_copy(callee);
1758
1759                                         /* create_irg_copy() destroys the Proj links, recompute them */
1760                                         phiproj_computed = 0;
1761
1762                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1763
1764                                         /* allocate new environment */
1765                                         callee_env = alloc_inline_irg_env();
1766                                         set_irg_link(copy, callee_env);
1767
1768                                         assure_cf_loop(copy);
1769                                         wenv.x              = callee_env;
1770                                         wenv.ignore_callers = 1;
1771                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1772
1773                                         /*
1774                                          * Enter the entity of the original graph. This is needed
1775                                          * for inline_method(). However, note that ent->irg still points
1776                                          * to callee, NOT to copy.
1777                                          */
1778                                         set_irg_entity(copy, get_irg_entity(callee));
1779
1780                                         pmap_insert(copied_graphs, callee, copy);
1781                                         callee = copy;
1782
1783                                         /* we have only one caller: the original graph */
1784                                         callee_env->n_callers      = 1;
1785                                         callee_env->n_callers_orig = 1;
1786                                 }
1787                                 if (! phiproj_computed) {
1788                                         phiproj_computed = 1;
1789                                         collect_phiprojs(current_ir_graph);
1790                                 }
1791                                 did_inline = inline_method(call, callee);
1792                                 if (did_inline) {
1793                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1794
1795                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1796                                         phiproj_computed = 0;
1797
1798                                         /* callee was inline. Append it's call list. */
1799                                         env->got_inline = 1;
1800                                         --env->n_call_nodes;
1801                                         append_call_list(env, callee_env, entry->loop_depth);
1802                                         --callee_env->n_callers;
1803
1804                                         /* after we have inlined callee, all called methods inside callee
1805                                            are now called once more */
1806                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1807                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1808                                                 ++penv->n_callers;
1809                                         }
1810
1811                                         /* remove this call from the list */
1812                                         list_del(&entry->list);
1813                                         continue;
1814                                 }
1815                         }
1816                 }
1817                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1818         }
1819
1820         for (i = 0; i < n_irgs; ++i) {
1821                 irg = get_irp_irg(i);
1822                 env = get_irg_link(irg);
1823
1824                 if (env->got_inline) {
1825                         optimize_graph_df(irg);
1826                         optimize_cf(irg);
1827                 }
1828                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1829                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1830                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1831                         env->n_callers_orig, env->n_callers,
1832                         get_entity_name(get_irg_entity(irg))));
1833                 }
1834         }
1835
1836         /* kill the copied graphs: we don't need them anymore */
1837         foreach_pmap(copied_graphs, pm_entry) {
1838                 ir_graph *copy = pm_entry->value;
1839
1840                 /* reset the entity, otherwise it will be deleted in the next step ... */
1841                 set_irg_entity(copy, NULL);
1842                 free_ir_graph(copy);
1843         }
1844         pmap_destroy(copied_graphs);
1845
1846         obstack_free(&temp_obst, NULL);
1847         current_ir_graph = rem;
1848 }
1849
1850 struct inline_leave_functions_pass_t {
1851         ir_prog_pass_t pass;
1852         unsigned       maxsize;
1853         unsigned       leavesize;
1854         unsigned       size;
1855         int            ignore_runtime;
1856 };
1857
1858 /**
1859  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1860  */
1861 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1862 {
1863         struct inline_leave_functions_pass_t *pass = context;
1864
1865         (void)irp;
1866         inline_leave_functions(
1867                 pass->maxsize, pass->leavesize,
1868                 pass->size, pass->ignore_runtime);
1869         return 0;
1870 }
1871
1872 /* create a pass for inline_leave_functions() */
1873 ir_prog_pass_t *inline_leave_functions_pass(
1874         const char *name, unsigned maxsize, unsigned leavesize,
1875         unsigned size, int ignore_runtime)
1876 {
1877         struct inline_leave_functions_pass_t *pass =
1878                 XMALLOCZ(struct inline_leave_functions_pass_t);
1879
1880         pass->maxsize        = maxsize;
1881         pass->leavesize      = leavesize;
1882         pass->size           = size;
1883         pass->ignore_runtime = ignore_runtime;
1884
1885         return def_prog_pass_constructor(
1886                 &pass->pass,
1887                 name ? name : "inline_leave_functions",
1888                 inline_leave_functions_wrapper);
1889 }
1890
1891 /**
1892  * Calculate the parameter weights for transmitting the address of a local variable.
1893  */
1894 static unsigned calc_method_local_weight(ir_node *arg)
1895 {
1896         int      i, j, k;
1897         unsigned v, weight = 0;
1898
1899         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1900                 ir_node *succ = get_irn_out(arg, i);
1901
1902                 switch (get_irn_opcode(succ)) {
1903                 case iro_Load:
1904                 case iro_Store:
1905                         /* Loads and Store can be removed */
1906                         weight += 3;
1907                         break;
1908                 case iro_Sel:
1909                         /* check if all args are constant */
1910                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1911                                 ir_node *idx = get_Sel_index(succ, j);
1912                                 if (! is_Const(idx))
1913                                         return 0;
1914                         }
1915                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1916                            some unsupported node. */
1917                         v = calc_method_local_weight(succ);
1918                         if (v == 0)
1919                                 return 0;
1920                         /* we can kill one Sel with constant indexes, this is cheap */
1921                         weight += v + 1;
1922                         break;
1923                 case iro_Id:
1924                         /* when looking backward we might find Id nodes */
1925                         weight += calc_method_local_weight(succ);
1926                         break;
1927                 case iro_Tuple:
1928                         /* unoptimized tuple */
1929                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1930                                 ir_node *pred = get_Tuple_pred(succ, j);
1931                                 if (pred == arg) {
1932                                         /* look for Proj(j) */
1933                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1934                                                 ir_node *succ_succ = get_irn_out(succ, k);
1935                                                 if (is_Proj(succ_succ)) {
1936                                                         if (get_Proj_proj(succ_succ) == j) {
1937                                                                 /* found */
1938                                                                 weight += calc_method_local_weight(succ_succ);
1939                                                         }
1940                                                 } else {
1941                                                         /* this should NOT happen */
1942                                                         return 0;
1943                                                 }
1944                                         }
1945                                 }
1946                         }
1947                         break;
1948                 default:
1949                         /* any other node: unsupported yet or bad. */
1950                         return 0;
1951                 }
1952         }
1953         return weight;
1954 }
1955
1956 /**
1957  * Calculate the parameter weights for transmitting the address of a local variable.
1958  */
1959 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1960 {
1961         ir_entity *ent = get_irg_entity(irg);
1962         ir_type  *mtp;
1963         int      nparams, i, proj_nr;
1964         ir_node  *irg_args, *arg;
1965
1966         mtp      = get_entity_type(ent);
1967         nparams  = get_method_n_params(mtp);
1968
1969         /* allocate a new array. currently used as 'analysed' flag */
1970         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1971
1972         /* If the method haven't parameters we have nothing to do. */
1973         if (nparams <= 0)
1974                 return;
1975
1976         assure_irg_outs(irg);
1977         irg_args = get_irg_args(irg);
1978         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1979                 arg     = get_irn_out(irg_args, i);
1980                 proj_nr = get_Proj_proj(arg);
1981                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1982         }
1983 }
1984
1985 /**
1986  * Calculate the benefice for transmitting an local variable address.
1987  * After inlining, the local variable might be transformed into a
1988  * SSA variable by scalar_replacement().
1989  */
1990 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos)
1991 {
1992         inline_irg_env *env = get_irg_link(callee);
1993
1994         if (env->local_weights != NULL) {
1995                 if (pos < ARR_LEN(env->local_weights))
1996                         return env->local_weights[pos];
1997                 return 0;
1998         }
1999
2000         analyze_irg_local_weights(env, callee);
2001
2002         if (pos < ARR_LEN(env->local_weights))
2003                 return env->local_weights[pos];
2004         return 0;
2005 }
2006
2007 /**
2008  * Calculate a benefice value for inlining the given call.
2009  *
2010  * @param call       the call node we have to inspect
2011  * @param callee     the called graph
2012  */
2013 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
2014 {
2015         ir_node   *call = entry->call;
2016         ir_entity *ent  = get_irg_entity(callee);
2017         ir_node   *frame_ptr;
2018         ir_type   *mtp;
2019         int       weight = 0;
2020         int       i, n_params, all_const;
2021         unsigned  cc, v;
2022         irg_inline_property prop;
2023
2024         inline_irg_env *callee_env;
2025
2026         prop = get_irg_inline_property(callee);
2027         if (prop == irg_inline_forbidden) {
2028                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
2029                     call, callee));
2030                 return entry->benefice = INT_MIN;
2031         }
2032
2033         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
2034                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
2035                     call, callee));
2036                 return entry->benefice = INT_MIN;
2037         }
2038
2039         /* costs for every passed parameter */
2040         n_params = get_Call_n_params(call);
2041         mtp      = get_entity_type(ent);
2042         cc       = get_method_calling_convention(mtp);
2043         if (cc & cc_reg_param) {
2044                 /* register parameter, smaller costs for register parameters */
2045                 int max_regs = cc & ~cc_bits;
2046
2047                 if (max_regs < n_params)
2048                         weight += max_regs * 2 + (n_params - max_regs) * 5;
2049                 else
2050                         weight += n_params * 2;
2051         } else {
2052                 /* parameters are passed an stack */
2053                 weight += 5 * n_params;
2054         }
2055
2056         /* constant parameters improve the benefice */
2057         frame_ptr = get_irg_frame(current_ir_graph);
2058         all_const = 1;
2059         for (i = 0; i < n_params; ++i) {
2060                 ir_node *param = get_Call_param(call, i);
2061
2062                 if (is_Const(param)) {
2063                         weight += get_method_param_weight(ent, i);
2064                 } else {
2065                         all_const = 0;
2066                         if (is_SymConst(param))
2067                                 weight += get_method_param_weight(ent, i);
2068                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
2069                                 /*
2070                                  * An address of a local variable is transmitted. After
2071                                  * inlining, scalar_replacement might be able to remove the
2072                                  * local variable, so honor this.
2073                                  */
2074                                 v = get_method_local_adress_weight(callee, i);
2075                                 weight += v;
2076                                 if (v > 0)
2077                                         entry->local_adr = 1;
2078                         }
2079                 }
2080         }
2081         entry->all_const = all_const;
2082
2083         callee_env = get_irg_link(callee);
2084         if (callee_env->n_callers == 1 &&
2085             callee != current_ir_graph &&
2086             !entity_is_externally_visible(ent)) {
2087                 weight += 700;
2088         }
2089
2090         /* give a bonus for functions with one block */
2091         if (callee_env->n_blocks == 1)
2092                 weight = weight * 3 / 2;
2093
2094         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
2095         if (callee_env->n_nodes < 30 && !callee_env->recursive)
2096                 weight += 2000;
2097
2098         /* and finally for leaves: they do not increase the register pressure
2099            because of callee safe registers */
2100         if (callee_env->n_call_nodes == 0)
2101                 weight += 400;
2102
2103         /** it's important to inline inner loops first */
2104         if (entry->loop_depth > 30)
2105                 weight += 30 * 1024;
2106         else
2107                 weight += entry->loop_depth * 1024;
2108
2109         /*
2110          * All arguments constant is probably a good sign, give an extra bonus
2111          */
2112         if (all_const)
2113                 weight += 1024;
2114
2115         return entry->benefice = weight;
2116 }
2117
2118 static ir_graph **irgs;
2119 static int      last_irg;
2120
2121 /**
2122  * Callgraph walker, collect all visited graphs.
2123  */
2124 static void callgraph_walker(ir_graph *irg, void *data)
2125 {
2126         (void) data;
2127         irgs[last_irg++] = irg;
2128 }
2129
2130 /**
2131  * Creates an inline order for all graphs.
2132  *
2133  * @return the list of graphs.
2134  */
2135 static ir_graph **create_irg_list(void)
2136 {
2137         ir_entity **free_methods;
2138         int       arr_len;
2139         int       n_irgs = get_irp_n_irgs();
2140
2141         cgana(&arr_len, &free_methods);
2142         xfree(free_methods);
2143
2144         compute_callgraph();
2145
2146         last_irg = 0;
2147         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
2148
2149         callgraph_walk(NULL, callgraph_walker, NULL);
2150         assert(n_irgs == last_irg);
2151
2152         return irgs;
2153 }
2154
2155 /**
2156  * Push a call onto the priority list if its benefice is big enough.
2157  *
2158  * @param pqueue   the priority queue of calls
2159  * @param call     the call entry
2160  * @param inlien_threshold
2161  *                 the threshold value
2162  */
2163 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2164                             int inline_threshold)
2165 {
2166         ir_graph            *callee  = call->callee;
2167         irg_inline_property prop     = get_irg_inline_property(callee);
2168         int                 benefice = calc_inline_benefice(call, callee);
2169
2170         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2171             get_irn_irg(call->call), call->call, callee, benefice));
2172
2173         if (prop < irg_inline_forced && benefice < inline_threshold) {
2174                 return;
2175         }
2176
2177         pqueue_put(pqueue, call, benefice);
2178 }
2179
2180 /**
2181  * Try to inline calls into a graph.
2182  *
2183  * @param irg      the graph into which we inline
2184  * @param maxsize  do NOT inline if the size of irg gets
2185  *                 bigger than this amount
2186  * @param inline_threshold
2187  *                 threshold value for inline decision
2188  * @param copied_graphs
2189  *                 map containing copied of recursive graphs
2190  */
2191 static void inline_into(ir_graph *irg, unsigned maxsize,
2192                         int inline_threshold, pmap *copied_graphs)
2193 {
2194         int            phiproj_computed = 0;
2195         inline_irg_env *env = get_irg_link(irg);
2196         call_entry     *curr_call;
2197         wenv_t         wenv;
2198         pqueue_t       *pqueue;
2199
2200         if (env->n_call_nodes == 0)
2201                 return;
2202
2203         if (env->n_nodes > maxsize) {
2204                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2205                 return;
2206         }
2207
2208         current_ir_graph = irg;
2209         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2210
2211         /* put irgs into the pqueue */
2212         pqueue = new_pqueue();
2213
2214         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
2215                 assert(is_Call(curr_call->call));
2216                 maybe_push_call(pqueue, curr_call, inline_threshold);
2217         }
2218
2219         /* note that the list of possible calls is updated during the process */
2220         while (!pqueue_empty(pqueue)) {
2221                 int                 did_inline;
2222                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
2223                 ir_graph            *callee     = curr_call->callee;
2224                 ir_node             *call_node  = curr_call->call;
2225                 inline_irg_env      *callee_env = get_irg_link(callee);
2226                 irg_inline_property prop        = get_irg_inline_property(callee);
2227                 int                 loop_depth;
2228                 const call_entry    *centry;
2229                 pmap_entry          *e;
2230
2231                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
2232                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2233                                                 env->n_nodes, callee, callee_env->n_nodes));
2234                         continue;
2235                 }
2236
2237                 e = pmap_find(copied_graphs, callee);
2238                 if (e != NULL) {
2239                         int benefice = curr_call->benefice;
2240                         /*
2241                          * Reduce the weight for recursive function IFF not all arguments are const.
2242                          * inlining recursive functions is rarely good.
2243                          */
2244                         if (!curr_call->all_const)
2245                                 benefice -= 2000;
2246                         if (benefice < inline_threshold)
2247                                 continue;
2248
2249                         /*
2250                          * Remap callee if we have a copy.
2251                          */
2252                         callee     = e->value;
2253                         callee_env = get_irg_link(callee);
2254                 }
2255
2256                 if (current_ir_graph == callee) {
2257                         /*
2258                          * Recursive call: we cannot directly inline because we cannot
2259                          * walk the graph and change it. So we have to make a copy of
2260                          * the graph first.
2261                          */
2262                         int benefice = curr_call->benefice;
2263                         ir_graph *copy;
2264
2265                         /*
2266                          * Reduce the weight for recursive function IFF not all arguments are const.
2267                          * inlining recursive functions is rarely good.
2268                          */
2269                         if (!curr_call->all_const)
2270                                 benefice -= 2000;
2271                         if (benefice < inline_threshold)
2272                                 continue;
2273
2274                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2275
2276                         /*
2277                          * No copy yet, create one.
2278                          * Note that recursive methods are never leaves, so it is
2279                          * sufficient to test this condition here.
2280                          */
2281                         copy = create_irg_copy(callee);
2282
2283                         /* create_irg_copy() destroys the Proj links, recompute them */
2284                         phiproj_computed = 0;
2285
2286                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2287
2288                         /* allocate a new environment */
2289                         callee_env = alloc_inline_irg_env();
2290                         set_irg_link(copy, callee_env);
2291
2292                         assure_cf_loop(copy);
2293                         wenv.x              = callee_env;
2294                         wenv.ignore_callers = 1;
2295                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2296
2297                         /*
2298                          * Enter the entity of the original graph. This is needed
2299                          * for inline_method(). However, note that ent->irg still points
2300                          * to callee, NOT to copy.
2301                          */
2302                         set_irg_entity(copy, get_irg_entity(callee));
2303
2304                         pmap_insert(copied_graphs, callee, copy);
2305                         callee = copy;
2306
2307                         /* we have only one caller: the original graph */
2308                         callee_env->n_callers      = 1;
2309                         callee_env->n_callers_orig = 1;
2310                 }
2311                 if (! phiproj_computed) {
2312                         phiproj_computed = 1;
2313                         collect_phiprojs(current_ir_graph);
2314                 }
2315                 did_inline = inline_method(call_node, callee);
2316                 if (!did_inline)
2317                         continue;
2318
2319                 /* call was inlined, Phi/Projs for current graph must be recomputed */
2320                 phiproj_computed = 0;
2321
2322                 /* remove it from the caller list */
2323                 list_del(&curr_call->list);
2324
2325                 /* callee was inline. Append it's call list. */
2326                 env->got_inline = 1;
2327                 --env->n_call_nodes;
2328
2329                 /* we just generate a bunch of new calls */
2330                 loop_depth = curr_call->loop_depth;
2331                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
2332                         inline_irg_env *penv = get_irg_link(centry->callee);
2333                         ir_node        *new_call;
2334                         call_entry     *new_entry;
2335
2336                         /* after we have inlined callee, all called methods inside
2337                          * callee are now called once more */
2338                         ++penv->n_callers;
2339
2340                         /* Note that the src list points to Call nodes in the inlined graph,
2341                          * but we need Call nodes in our graph. Luckily the inliner leaves
2342                          * this information in the link field. */
2343                         new_call = get_irn_link(centry->call);
2344                         assert(is_Call(new_call));
2345
2346                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
2347                         list_add_tail(&new_entry->list, &env->calls);
2348                         maybe_push_call(pqueue, new_entry, inline_threshold);
2349                 }
2350
2351                 env->n_call_nodes += callee_env->n_call_nodes;
2352                 env->n_nodes += callee_env->n_nodes;
2353                 --callee_env->n_callers;
2354         }
2355         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2356         del_pqueue(pqueue);
2357 }
2358
2359 /*
2360  * Heuristic inliner. Calculates a benefice value for every call and inlines
2361  * those calls with a value higher than the threshold.
2362  */
2363 void inline_functions(unsigned maxsize, int inline_threshold,
2364                       opt_ptr after_inline_opt)
2365 {
2366         inline_irg_env   *env;
2367         int              i, n_irgs;
2368         ir_graph         *rem;
2369         wenv_t           wenv;
2370         pmap             *copied_graphs;
2371         pmap_entry       *pm_entry;
2372         ir_graph         **irgs;
2373
2374         rem = current_ir_graph;
2375         obstack_init(&temp_obst);
2376
2377         irgs = create_irg_list();
2378
2379         /* a map for the copied graphs, used to inline recursive calls */
2380         copied_graphs = pmap_create();
2381
2382         /* extend all irgs by a temporary data structure for inlining. */
2383         n_irgs = get_irp_n_irgs();
2384         for (i = 0; i < n_irgs; ++i)
2385                 set_irg_link(irgs[i], alloc_inline_irg_env());
2386
2387         /* Pre-compute information in temporary data structure. */
2388         wenv.ignore_runtime = 0;
2389         wenv.ignore_callers = 0;
2390         for (i = 0; i < n_irgs; ++i) {
2391                 ir_graph *irg = irgs[i];
2392
2393                 free_callee_info(irg);
2394
2395                 wenv.x = get_irg_link(irg);
2396                 assure_cf_loop(irg);
2397                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2398         }
2399
2400         /* -- and now inline. -- */
2401         for (i = 0; i < n_irgs; ++i) {
2402                 ir_graph *irg = irgs[i];
2403
2404                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2405         }
2406
2407         for (i = 0; i < n_irgs; ++i) {
2408                 ir_graph *irg = irgs[i];
2409
2410                 env = get_irg_link(irg);
2411                 if (env->got_inline && after_inline_opt != NULL) {
2412                         /* this irg got calls inlined: optimize it */
2413                         after_inline_opt(irg);
2414                 }
2415                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2416                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2417                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2418                         env->n_callers_orig, env->n_callers,
2419                         get_entity_name(get_irg_entity(irg))));
2420                 }
2421         }
2422
2423         /* kill the copied graphs: we don't need them anymore */
2424         foreach_pmap(copied_graphs, pm_entry) {
2425                 ir_graph *copy = pm_entry->value;
2426
2427                 /* reset the entity, otherwise it will be deleted in the next step ... */
2428                 set_irg_entity(copy, NULL);
2429                 free_ir_graph(copy);
2430         }
2431         pmap_destroy(copied_graphs);
2432
2433         xfree(irgs);
2434
2435         obstack_free(&temp_obst, NULL);
2436         current_ir_graph = rem;
2437 }
2438
2439 struct inline_functions_pass_t {
2440         ir_prog_pass_t pass;
2441         unsigned       maxsize;
2442         int            inline_threshold;
2443         opt_ptr        after_inline_opt;
2444 };
2445
2446 /**
2447  * Wrapper to run inline_functions() as a ir_prog pass.
2448  */
2449 static int inline_functions_wrapper(ir_prog *irp, void *context)
2450 {
2451         struct inline_functions_pass_t *pass = context;
2452
2453         (void)irp;
2454         inline_functions(pass->maxsize, pass->inline_threshold,
2455                          pass->after_inline_opt);
2456         return 0;
2457 }
2458
2459 /* create a ir_prog pass for inline_functions */
2460 ir_prog_pass_t *inline_functions_pass(
2461           const char *name, unsigned maxsize, int inline_threshold,
2462           opt_ptr after_inline_opt)
2463 {
2464         struct inline_functions_pass_t *pass =
2465                 XMALLOCZ(struct inline_functions_pass_t);
2466
2467         pass->maxsize          = maxsize;
2468         pass->inline_threshold = inline_threshold;
2469         pass->after_inline_opt = after_inline_opt;
2470
2471         return def_prog_pass_constructor(
2472                 &pass->pass, name ? name : "inline_functions",
2473                 inline_functions_wrapper);
2474 }
2475
2476 void firm_init_inline(void)
2477 {
2478         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2479 }