nsz Git - libfirm/blob - ir/opt/opt_inline.c

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief    Dead node elimination and Procedure Inlining.
  23  * @author   Michael Beck, Goetz Lindenmaier
  24  * @version  $Id$
  25  */
  26 #ifdef HAVE_CONFIG_H
  27 # include "config.h"
  28 #endif
  29
  30 #include <limits.h>
  31 #include <assert.h>
  32
  33 #include "irnode_t.h"
  34 #include "irgraph_t.h"
  35 #include "irprog_t.h"
  36
  37 #include "iroptimize.h"
  38 #include "ircons_t.h"
  39 #include "iropt_t.h"
  40 #include "irgopt.h"
  41 #include "irgmod.h"
  42 #include "irgwalk.h"
  43
  44 #include "adt/array.h"
  45 #include "adt/list.h"
  46 #include "adt/pset.h"
  47 #include "adt/pmap.h"
  48 #include "adt/pdeq.h"
  49 #include "adt/xmalloc.h"
  50 #include "adt/pqueue.h"
  51
  52 #include "irouts.h"
  53 #include "irloop_t.h"
  54 #include "irbackedge_t.h"
  55 #include "opt_inline_t.h"
  56 #include "cgana.h"
  57 #include "trouts.h"
  58 #include "error.h"
  59
  60 #include "analyze_irg_args.h"
  61 #include "iredges_t.h"
  62 #include "irflag_t.h"
  63 #include "irhooks.h"
  64 #include "irtools.h"
  65 #include "iropt_dbg.h"
  66
  67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  68
  69 /*------------------------------------------------------------------*/
  70 /* Routines for dead node elimination / copying garbage collection  */
  71 /* of the obstack.                                                  */
  72 /*------------------------------------------------------------------*/
  73
  74 /**
  75  * Remember the new node in the old node by using a field all nodes have.
  76  */
  77 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
  78
  79 /**
  80  * Get this new node, before the old node is forgotten.
  81  */
  82 #define get_new_node(oldn) get_irn_link(oldn)
  83
  84 /**
  85  * Check if a new node was set.
  86  */
  87 #define has_new_node(n) (get_new_node(n) != NULL)
  88
  89 /**
  90  * We use the block_visited flag to mark that we have computed the
  91  * number of useful predecessors for this block.
  92  * Further we encode the new arity in this flag in the old blocks.
  93  * Remembering the arity is useful, as it saves a lot of pointer
  94  * accesses.  This function is called for all Phi and Block nodes
  95  * in a Block.
  96  */
  97 static INLINE int
  98 compute_new_arity(ir_node *b) {
  99         int i, res, irn_arity;
 100         int irg_v, block_v;
 101
 102         irg_v = get_irg_block_visited(current_ir_graph);
 103         block_v = get_Block_block_visited(b);
 104         if (block_v >= irg_v) {
 105                 /* we computed the number of preds for this block and saved it in the
 106                    block_v flag */
 107                 return block_v - irg_v;
 108         } else {
 109                 /* compute the number of good predecessors */
 110                 res = irn_arity = get_irn_arity(b);
 111                 for (i = 0; i < irn_arity; i++)
 112                         if (is_Bad(get_irn_n(b, i))) res--;
 113                         /* save it in the flag. */
 114                         set_Block_block_visited(b, irg_v + res);
 115                         return res;
 116         }
 117 }
 118
 119 /**
 120  * Copies the node to the new obstack. The Ins of the new node point to
 121  * the predecessors on the old obstack.  For block/phi nodes not all
 122  * predecessors might be copied.  n->link points to the new node.
 123  * For Phi and Block nodes the function allocates in-arrays with an arity
 124  * only for useful predecessors.  The arity is determined by counting
 125  * the non-bad predecessors of the block.
 126  *
 127  * @param n    The node to be copied
 128  * @param env  if non-NULL, the node number attribute will be copied to the new node
 129  *
 130  * Note: Also used for loop unrolling.
 131  */
 132 static void copy_node(ir_node *n, void *env) {
 133         ir_node *nn, *block;
 134         int new_arity;
 135         ir_op *op = get_irn_op(n);
 136         (void) env;
 137
 138         if (op == op_Bad) {
 139                 /* node copied already */
 140                 return;
 141         } else if (op == op_Block) {
 142                 block = NULL;
 143                 new_arity = compute_new_arity(n);
 144                 n->attr.block.graph_arr = NULL;
 145         } else {
 146                 block = get_nodes_block(n);
 147                 if (op == op_Phi) {
 148                         new_arity = compute_new_arity(block);
 149                 } else {
 150                         new_arity = get_irn_arity(n);
 151                 }
 152         }
 153         nn = new_ir_node(get_irn_dbg_info(n),
 154                 current_ir_graph,
 155                 block,
 156                 op,
 157                 get_irn_mode(n),
 158                 new_arity,
 159                 get_irn_in(n) + 1);
 160         /* Copy the attributes.  These might point to additional data.  If this
 161            was allocated on the old obstack the pointers now are dangling.  This
 162            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
 163         if (op == op_Block) {
 164                 /* we cannot allow blocks WITHOUT macroblock input */
 165                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
 166         }
 167         copy_node_attr(n, nn);
 168
 169 #ifdef DEBUG_libfirm
 170         {
 171                 int copy_node_nr = env != NULL;
 172                 if (copy_node_nr) {
 173                         /* for easier debugging, we want to copy the node numbers too */
 174                         nn->node_nr = n->node_nr;
 175                 }
 176         }
 177 #endif
 178
 179         set_new_node(n, nn);
 180         hook_dead_node_elim_subst(current_ir_graph, n, nn);
 181 }
 182
 183 /**
 184  * Copies new predecessors of old node to new node remembered in link.
 185  * Spare the Bad predecessors of Phi and Block nodes.
 186  */
 187 static void copy_preds(ir_node *n, void *env) {
 188         ir_node *nn, *block;
 189         int i, j, irn_arity;
 190         (void) env;
 191
 192         nn = get_new_node(n);
 193
 194         if (is_Block(n)) {
 195                 /* copy the macro block header */
 196                 ir_node *mbh = get_Block_MacroBlock(n);
 197
 198                 if (mbh == n) {
 199                         /* this block is a macroblock header */
 200                         set_Block_MacroBlock(nn, nn);
 201                 } else {
 202                         /* get the macro block header */
 203                         ir_node *nmbh = get_new_node(mbh);
 204                         assert(nmbh != NULL);
 205                         set_Block_MacroBlock(nn, nmbh);
 206                 }
 207
 208                 /* Don't copy Bad nodes. */
 209                 j = 0;
 210                 irn_arity = get_irn_arity(n);
 211                 for (i = 0; i < irn_arity; i++) {
 212                         if (! is_Bad(get_irn_n(n, i))) {
 213                                 ir_node *pred = get_irn_n(n, i);
 214                                 set_irn_n(nn, j, get_new_node(pred));
 215                                 j++;
 216                         }
 217                 }
 218                 /* repair the block visited flag from above misuse. Repair it in both
 219                    graphs so that the old one can still be used. */
 220                 set_Block_block_visited(nn, 0);
 221                 set_Block_block_visited(n, 0);
 222                 /* Local optimization could not merge two subsequent blocks if
 223                    in array contained Bads.  Now it's possible.
 224                    We don't call optimize_in_place as it requires
 225                    that the fields in ir_graph are set properly. */
 226                 if ((get_opt_control_flow_straightening()) &&
 227                         (get_Block_n_cfgpreds(nn) == 1) &&
 228                         is_Jmp(get_Block_cfgpred(nn, 0))) {
 229                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
 230                         if (nn == old) {
 231                                 /* Jmp jumps into the block it is in -- deal self cycle. */
 232                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
 233                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
 234                         } else {
 235                                 exchange(nn, old);
 236                         }
 237                 }
 238         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
 239                 /* Don't copy node if corresponding predecessor in block is Bad.
 240                    The Block itself should not be Bad. */
 241                 block = get_nodes_block(n);
 242                 set_nodes_block(nn, get_new_node(block));
 243                 j = 0;
 244                 irn_arity = get_irn_arity(n);
 245                 for (i = 0; i < irn_arity; i++) {
 246                         if (! is_Bad(get_irn_n(block, i))) {
 247                                 ir_node *pred = get_irn_n(n, i);
 248                                 set_irn_n(nn, j, get_new_node(pred));
 249                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
 250                                 j++;
 251                         }
 252                 }
 253                 /* If the pre walker reached this Phi after the post walker visited the
 254                    block block_visited is > 0. */
 255                 set_Block_block_visited(get_nodes_block(n), 0);
 256                 /* Compacting the Phi's ins might generate Phis with only one
 257                    predecessor. */
 258                 if (get_irn_arity(nn) == 1)
 259                         exchange(nn, get_irn_n(nn, 0));
 260         } else {
 261                 irn_arity = get_irn_arity(n);
 262                 for (i = -1; i < irn_arity; i++)
 263                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
 264         }
 265         /* Now the new node is complete.  We can add it to the hash table for CSE.
 266            @@@ inlining aborts if we identify End. Why? */
 267         if (!is_End(nn))
 268                 add_identities(current_ir_graph->value_table, nn);
 269 }
 270
 271 /**
 272  * Copies the graph recursively, compacts the keep-alives of the end node.
 273  *
 274  * @param irg           the graph to be copied
 275  * @param copy_node_nr  If non-zero, the node number will be copied
 276  */
 277 static void copy_graph(ir_graph *irg, int copy_node_nr) {
 278         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
 279         ir_node *ka;      /* keep alive */
 280         int i, irn_arity;
 281         unsigned long vfl;
 282
 283         /* Some nodes must be copied by hand, sigh */
 284         vfl = get_irg_visited(irg);
 285         set_irg_visited(irg, vfl + 1);
 286
 287         oe = get_irg_end(irg);
 288         mark_irn_visited(oe);
 289         /* copy the end node by hand, allocate dynamic in array! */
 290         ne = new_ir_node(get_irn_dbg_info(oe),
 291                 irg,
 292                 NULL,
 293                 op_End,
 294                 mode_X,
 295                 -1,
 296                 NULL);
 297         /* Copy the attributes.  Well, there might be some in the future... */
 298         copy_node_attr(oe, ne);
 299         set_new_node(oe, ne);
 300
 301         /* copy the Bad node */
 302         ob = get_irg_bad(irg);
 303         mark_irn_visited(ob);
 304         nb = new_ir_node(get_irn_dbg_info(ob),
 305                 irg,
 306                 NULL,
 307                 op_Bad,
 308                 mode_T,
 309                 0,
 310                 NULL);
 311         copy_node_attr(ob, nb);
 312         set_new_node(ob, nb);
 313
 314         /* copy the NoMem node */
 315         om = get_irg_no_mem(irg);
 316         mark_irn_visited(om);
 317         nm = new_ir_node(get_irn_dbg_info(om),
 318                 irg,
 319                 NULL,
 320                 op_NoMem,
 321                 mode_M,
 322                 0,
 323                 NULL);
 324         copy_node_attr(om, nm);
 325         set_new_node(om, nm);
 326
 327         /* copy the live nodes */
 328         set_irg_visited(irg, vfl);
 329         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 330
 331         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
 332
 333         /* visit the anchors as well */
 334         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 335                 ir_node *n = get_irg_anchor(irg, i);
 336
 337                 if (n && (get_irn_visited(n) <= vfl)) {
 338                         set_irg_visited(irg, vfl);
 339                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 340                 }
 341         }
 342
 343         /* copy_preds for the end node ... */
 344         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
 345
 346         /*- ... and now the keep alives. -*/
 347         /* First pick the not marked block nodes and walk them.  We must pick these
 348            first as else we will oversee blocks reachable from Phis. */
 349         irn_arity = get_End_n_keepalives(oe);
 350         for (i = 0; i < irn_arity; i++) {
 351                 ka = get_End_keepalive(oe, i);
 352                 if (is_Block(ka)) {
 353                         if (get_irn_visited(ka) <= vfl) {
 354                                 /* We must keep the block alive and copy everything reachable */
 355                                 set_irg_visited(irg, vfl);
 356                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 357                         }
 358                         add_End_keepalive(ne, get_new_node(ka));
 359                 }
 360         }
 361
 362         /* Now pick other nodes.  Here we will keep all! */
 363         irn_arity = get_End_n_keepalives(oe);
 364         for (i = 0; i < irn_arity; i++) {
 365                 ka = get_End_keepalive(oe, i);
 366                 if (!is_Block(ka)) {
 367                         if (get_irn_visited(ka) <= vfl) {
 368                                 /* We didn't copy the node yet.  */
 369                                 set_irg_visited(irg, vfl);
 370                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
 371                         }
 372                         add_End_keepalive(ne, get_new_node(ka));
 373                 }
 374         }
 375
 376         /* start block sometimes only reached after keep alives */
 377         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
 378         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
 379 }
 380
 381 /**
 382  * Copies the graph reachable from current_ir_graph->end to the obstack
 383  * in current_ir_graph and fixes the environment.
 384  * Then fixes the fields in current_ir_graph containing nodes of the
 385  * graph.
 386  *
 387  * @param copy_node_nr  If non-zero, the node number will be copied
 388  */
 389 static void
 390 copy_graph_env(int copy_node_nr) {
 391         ir_graph *irg = current_ir_graph;
 392         ir_node *old_end, *new_anchor;
 393         int i;
 394
 395         /* remove end_except and end_reg nodes */
 396         old_end = get_irg_end(irg);
 397         set_irg_end_except (irg, old_end);
 398         set_irg_end_reg    (irg, old_end);
 399
 400         /* Not all nodes remembered in irg might be reachable
 401            from the end node.  Assure their link is set to NULL, so that
 402            we can test whether new nodes have been computed. */
 403         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 404                 ir_node *n = get_irg_anchor(irg, i);
 405                 if (n != NULL)
 406                         set_new_node(n, NULL);
 407         }
 408         /* we use the block walk flag for removing Bads from Blocks ins. */
 409         inc_irg_block_visited(irg);
 410
 411         /* copy the graph */
 412         copy_graph(irg, copy_node_nr);
 413
 414         /* fix the anchor */
 415         old_end    = get_irg_end(irg);
 416         new_anchor = new_Anchor(irg);
 417
 418         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
 419                 ir_node *n = get_irg_anchor(irg, i);
 420                 if (n)
 421                         set_irn_n(new_anchor, i, get_new_node(n));
 422         }
 423         free_End(old_end);
 424         irg->anchor = new_anchor;
 425
 426         /* ensure the new anchor is placed in the endblock */
 427         set_nodes_block(new_anchor, get_irg_end_block(irg));
 428 }
 429
 430 /**
 431  * Copies all reachable nodes to a new obstack.  Removes bad inputs
 432  * from block nodes and the corresponding inputs from Phi nodes.
 433  * Merges single exit blocks with single entry blocks and removes
 434  * 1-input Phis.
 435  * Adds all new nodes to a new hash table for CSE.  Does not
 436  * perform CSE, so the hash table might contain common subexpressions.
 437  */
 438 void dead_node_elimination(ir_graph *irg) {
 439         ir_graph *rem;
 440 #ifdef INTERPROCEDURAL_VIEW
 441         int rem_ipview = get_interprocedural_view();
 442 #endif
 443         struct obstack *graveyard_obst = NULL;
 444         struct obstack *rebirth_obst   = NULL;
 445
 446         edges_deactivate(irg);
 447
 448         /* inform statistics that we started a dead-node elimination run */
 449         hook_dead_node_elim(irg, 1);
 450
 451         /* Remember external state of current_ir_graph. */
 452         rem = current_ir_graph;
 453         current_ir_graph = irg;
 454 #ifdef INTERPROCEDURAL_VIEW
 455         set_interprocedural_view(0);
 456 #endif
 457
 458         assert(get_irg_phase_state(irg) != phase_building);
 459
 460         /* Handle graph state */
 461         free_callee_info(irg);
 462         free_irg_outs(irg);
 463         free_trouts();
 464
 465         /* @@@ so far we loose loops when copying */
 466         free_loop_information(irg);
 467
 468         set_irg_doms_inconsistent(irg);
 469
 470         /* A quiet place, where the old obstack can rest in peace,
 471            until it will be cremated. */
 472         graveyard_obst = irg->obst;
 473
 474         /* A new obstack, where the reachable nodes will be copied to. */
 475         rebirth_obst = xmalloc(sizeof(*rebirth_obst));
 476         irg->obst = rebirth_obst;
 477         obstack_init(irg->obst);
 478         irg->last_node_idx = 0;
 479
 480         /* We also need a new value table for CSE */
 481         del_identities(irg->value_table);
 482         irg->value_table = new_identities();
 483
 484         /* Copy the graph from the old to the new obstack */
 485         copy_graph_env(/*copy_node_nr=*/1);
 486
 487         /* Free memory from old unoptimized obstack */
 488         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
 489         xfree(graveyard_obst);            /* ... then free it.           */
 490
 491         /* inform statistics that the run is over */
 492         hook_dead_node_elim(irg, 0);
 493
 494         current_ir_graph = rem;
 495 #ifdef INTERPROCEDURAL_VIEW
 496         set_interprocedural_view(rem_ipview);
 497 #endif
 498 }
 499
 500 /**
 501  * Relink bad predecessors of a block and store the old in array to the
 502  * link field. This function is called by relink_bad_predecessors().
 503  * The array of link field starts with the block operand at position 0.
 504  * If block has bad predecessors, create a new in array without bad preds.
 505  * Otherwise let in array untouched.
 506  */
 507 static void relink_bad_block_predecessors(ir_node *n, void *env) {
 508         ir_node **new_in, *irn;
 509         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
 510         (void) env;
 511
 512         /* if link field of block is NULL, look for bad predecessors otherwise
 513            this is already done */
 514         if (is_Block(n) && get_irn_link(n) == NULL) {
 515                 /* save old predecessors in link field (position 0 is the block operand)*/
 516                 set_irn_link(n, get_irn_in(n));
 517
 518                 /* count predecessors without bad nodes */
 519                 old_irn_arity = get_irn_arity(n);
 520                 for (i = 0; i < old_irn_arity; i++)
 521                         if (!is_Bad(get_irn_n(n, i)))
 522                                 ++new_irn_arity;
 523
 524                 /* arity changing: set new predecessors without bad nodes */
 525                 if (new_irn_arity < old_irn_arity) {
 526                         /* Get new predecessor array. We do not resize the array, as we must
 527                            keep the old one to update Phis. */
 528                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
 529
 530                         /* set new predecessors in array */
 531                         new_in[0] = NULL;
 532                         new_irn_n = 1;
 533                         for (i = 0; i < old_irn_arity; i++) {
 534                                 irn = get_irn_n(n, i);
 535                                 if (!is_Bad(irn)) {
 536                                         new_in[new_irn_n] = irn;
 537                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
 538                                         ++new_irn_n;
 539                                 }
 540                         }
 541                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
 542                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
 543                         n->in = new_in;
 544                 } /* ir node has bad predecessors */
 545         } /* Block is not relinked */
 546 }
 547
 548 /**
 549  * Relinks Bad predecessors from Blocks and Phis called by walker
 550  * remove_bad_predecesors(). If n is a Block, call
 551  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
 552  * function of Phi's Block. If this block has bad predecessors, relink preds
 553  * of the Phi-node.
 554  */
 555 static void relink_bad_predecessors(ir_node *n, void *env) {
 556         ir_node *block, **old_in;
 557         int i, old_irn_arity, new_irn_arity;
 558
 559         /* relink bad predecessors of a block */
 560         if (is_Block(n))
 561                 relink_bad_block_predecessors(n, env);
 562
 563         /* If Phi node relink its block and its predecessors */
 564         if (is_Phi(n)) {
 565                 /* Relink predecessors of phi's block */
 566                 block = get_nodes_block(n);
 567                 if (get_irn_link(block) == NULL)
 568                         relink_bad_block_predecessors(block, env);
 569
 570                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
 571                 old_irn_arity = ARR_LEN(old_in);
 572
 573                 /* Relink Phi predecessors if count of predecessors changed */
 574                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
 575                         /* set new predecessors in array
 576                            n->in[0] remains the same block */
 577                         new_irn_arity = 1;
 578                         for(i = 1; i < old_irn_arity; i++)
 579                                 if (!is_Bad(old_in[i])) {
 580                                         n->in[new_irn_arity] = n->in[i];
 581                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
 582                                         ++new_irn_arity;
 583                                 }
 584
 585                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
 586                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
 587                 }
 588         } /* n is a Phi node */
 589 }
 590
 591 /*
 592  * Removes Bad Bad predecessors from Blocks and the corresponding
 593  * inputs to Phi nodes as in dead_node_elimination but without
 594  * copying the graph.
 595  * On walking up set the link field to NULL, on walking down call
 596  * relink_bad_predecessors() (This function stores the old in array
 597  * to the link field and sets a new in array if arity of predecessors
 598  * changes).
 599  */
 600 void remove_bad_predecessors(ir_graph *irg) {
 601         panic("Fix backedge handling first");
 602         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
 603 }
 604
 605
 606 /*
 607    __                      _  __ __
 608   (_     __    o     _    | \/  |_
 609   __)|_| | \_/ | \_/(/_   |_/\__|__
 610
 611   The following stuff implements a facility that automatically patches
 612   registered ir_node pointers to the new node when a dead node elimination occurs.
 613 */
 614
 615 struct _survive_dce_t {
 616         struct obstack obst;
 617         pmap *places;
 618         pmap *new_places;
 619         hook_entry_t dead_node_elim;
 620         hook_entry_t dead_node_elim_subst;
 621 };
 622
 623 typedef struct _survive_dce_list_t {
 624         struct _survive_dce_list_t *next;
 625         ir_node **place;
 626 } survive_dce_list_t;
 627
 628 static void dead_node_hook(void *context, ir_graph *irg, int start) {
 629         survive_dce_t *sd = context;
 630         (void) irg;
 631
 632         /* Create a new map before the dead node elimination is performed. */
 633         if (start) {
 634                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
 635         } else {
 636                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
 637                 pmap_destroy(sd->places);
 638                 sd->places     = sd->new_places;
 639                 sd->new_places = NULL;
 640         }
 641 }
 642
 643 /**
 644  * Hook called when dead node elimination replaces old by nw.
 645  */
 646 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw) {
 647         survive_dce_t *sd = context;
 648         survive_dce_list_t *list = pmap_get(sd->places, old);
 649         (void) irg;
 650
 651         /* If the node is to be patched back, write the new address to all registered locations. */
 652         if (list) {
 653                 survive_dce_list_t *p;
 654
 655                 for (p = list; p; p = p->next)
 656                         *(p->place) = nw;
 657
 658                 pmap_insert(sd->new_places, nw, list);
 659         }
 660 }
 661
 662 /**
 663  * Make a new Survive DCE environment.
 664  */
 665 survive_dce_t *new_survive_dce(void) {
 666         survive_dce_t *res = xmalloc(sizeof(res[0]));
 667         obstack_init(&res->obst);
 668         res->places     = pmap_create();
 669         res->new_places = NULL;
 670
 671         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
 672         res->dead_node_elim.context                   = res;
 673         res->dead_node_elim.next                      = NULL;
 674
 675         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
 676         res->dead_node_elim_subst.context = res;
 677         res->dead_node_elim_subst.next    = NULL;
 678
 679 #ifndef FIRM_ENABLE_HOOKS
 680         assert(0 && "need hooks enabled");
 681 #endif
 682
 683         register_hook(hook_dead_node_elim, &res->dead_node_elim);
 684         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
 685         return res;
 686 }
 687
 688 /**
 689  * Free a Survive DCE environment.
 690  */
 691 void free_survive_dce(survive_dce_t *sd) {
 692         obstack_free(&sd->obst, NULL);
 693         pmap_destroy(sd->places);
 694         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
 695         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
 696         xfree(sd);
 697 }
 698
 699 /**
 700  * Register a node pointer to be patched upon DCE.
 701  * When DCE occurs, the node pointer specified by @p place will be
 702  * patched to the new address of the node it is pointing to.
 703  *
 704  * @param sd    The Survive DCE environment.
 705  * @param place The address of the node pointer.
 706  */
 707 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
 708         if (*place != NULL) {
 709                 ir_node *irn      = *place;
 710                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
 711                 survive_dce_list_t *nw   = obstack_alloc(&sd->obst, sizeof(nw[0]));
 712
 713                 nw->next  = curr;
 714                 nw->place = place;
 715
 716                 pmap_insert(sd->places, irn, nw);
 717         }
 718 }
 719
 720 /*--------------------------------------------------------------------*/
 721 /*  Functionality for inlining                                         */
 722 /*--------------------------------------------------------------------*/
 723
 724 /**
 725  * Copy node for inlineing.  Updates attributes that change when
 726  * inlineing but not for dead node elimination.
 727  *
 728  * Copies the node by calling copy_node() and then updates the entity if
 729  * it's a local one.  env must be a pointer of the frame type of the
 730  * inlined procedure. The new entities must be in the link field of
 731  * the entities.
 732  */
 733 static void copy_node_inline(ir_node *n, void *env) {
 734         ir_node *nn;
 735         ir_type *frame_tp = (ir_type *)env;
 736
 737         copy_node(n, NULL);
 738         if (is_Sel(n)) {
 739                 nn = get_new_node (n);
 740                 assert(is_Sel(nn));
 741                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
 742                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
 743                 }
 744         } else if (is_Block(n)) {
 745                 nn = get_new_node (n);
 746                 nn->attr.block.irg = current_ir_graph;
 747         }
 748 }
 749
 750 /**
 751  * Copies new predecessors of old node and move constants to
 752  * the Start Block.
 753  */
 754 static void copy_preds_inline(ir_node *n, void *env) {
 755         ir_node *nn;
 756
 757         copy_preds(n, env);
 758         nn = skip_Id(get_new_node(n));
 759         if (is_irn_constlike(nn)) {
 760                 /* move Constants into the start block */
 761                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
 762
 763                 n = identify_remember(current_ir_graph->value_table, nn);
 764                 if (nn != n) {
 765                         DBG_OPT_CSE(nn, n);
 766                         exchange(nn, n);
 767                 }
 768         }
 769 }
 770
 771 /**
 772  * Walker: checks if P_value_arg_base is used.
 773  */
 774 static void find_addr(ir_node *node, void *env) {
 775         int *allow_inline = env;
 776         if (is_Proj(node) &&
 777                         is_Start(get_Proj_pred(node)) &&
 778                         get_Proj_proj(node) == pn_Start_P_value_arg_base) {
 779                 *allow_inline = 0;
 780         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
 781                 /* From GCC:
 782                  * Refuse to inline alloca call unless user explicitly forced so as this
 783                  * may change program's memory overhead drastically when the function
 784                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
 785                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
 786                  *
 787                  * Sorryly this is true with our implementation also.
 788                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
 789                  * disables inlining of functions using VLA (with are completely save).
 790                  *
 791                  * 2 Solutions:
 792                  * - add a flag to the Alloc node for "real" alloca() calls
 793                  * - add a new Stack-Restore node at the end of a function using alloca()
 794                  */
 795                 *allow_inline = 0;
 796         }
 797 }
 798
 799 /**
 800  * Check if we can inline a given call.
 801  * Currently, we cannot inline two cases:
 802  * - call with compound arguments
 803  * - graphs that take the address of a parameter
 804  *
 805  * check these conditions here
 806  */
 807 static int can_inline(ir_node *call, ir_graph *called_graph) {
 808         ir_type *call_type = get_Call_type(call);
 809         int params, ress, i, res;
 810         assert(is_Method_type(call_type));
 811
 812         params = get_method_n_params(call_type);
 813         ress   = get_method_n_ress(call_type);
 814
 815         /* check parameters for compound arguments */
 816         for (i = 0; i < params; ++i) {
 817                 ir_type *p_type = get_method_param_type(call_type, i);
 818
 819                 if (is_compound_type(p_type))
 820                         return 0;
 821         }
 822
 823         /* check results for compound arguments */
 824         for (i = 0; i < ress; ++i) {
 825                 ir_type *r_type = get_method_res_type(call_type, i);
 826
 827                 if (is_compound_type(r_type))
 828                         return 0;
 829         }
 830
 831         res = 1;
 832         irg_walk_graph(called_graph, find_addr, NULL, &res);
 833
 834         return res;
 835 }
 836
 837 enum exc_mode {
 838         exc_handler    = 0, /**< There is a handler. */
 839         exc_to_end     = 1, /**< Branches to End. */
 840         exc_no_handler = 2  /**< Exception handling not represented. */
 841 };
 842
 843 /* Inlines a method at the given call site. */
 844 int inline_method(ir_node *call, ir_graph *called_graph) {
 845         ir_node             *pre_call;
 846         ir_node             *post_call, *post_bl;
 847         ir_node             *in[pn_Start_max];
 848         ir_node             *end, *end_bl, *block;
 849         ir_node             **res_pred;
 850         ir_node             **cf_pred;
 851         ir_node             **args_in;
 852         ir_node             *ret, *phi;
 853         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
 854         enum exc_mode       exc_handling;
 855         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
 856         ir_entity           *ent;
 857         ir_graph            *rem, *irg;
 858         irg_inline_property prop = get_irg_inline_property(called_graph);
 859         unsigned long       visited;
 860
 861         if (prop == irg_inline_forbidden)
 862                 return 0;
 863
 864         ent = get_irg_entity(called_graph);
 865
 866         mtp = get_entity_type(ent);
 867         ctp = get_Call_type(call);
 868         if (get_method_n_params(mtp) > get_method_n_params(ctp)) {
 869                 /* this is a bad feature of C: without a prototype, we can can call a function with less
 870                 parameters than needed. Currently we don't support this, although it would be
 871                 to use Unknown than. */
 872                 return 0;
 873         }
 874
 875         /* Argh, compiling C has some bad consequences:
 876            the call type AND the method type might be different.
 877            It is implementation defendant what happens in that case.
 878            We support inlining, if the bitsize of the types matches AND
 879            the same arithmetic is used. */
 880         n_params = get_method_n_params(mtp);
 881         for (i = n_params - 1; i >= 0; --i) {
 882                 ir_type *param_tp = get_method_param_type(mtp, i);
 883                 ir_type *arg_tp   = get_method_param_type(ctp, i);
 884
 885                 if (param_tp != arg_tp) {
 886                         ir_mode *pmode = get_type_mode(param_tp);
 887                         ir_mode *amode = get_type_mode(arg_tp);
 888
 889                         if (pmode == NULL || amode == NULL)
 890                                 return 0;
 891                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
 892                                 return 0;
 893                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
 894                                 return 0;
 895                         /* otherwise we can simply "reinterpret" the bits */
 896                 }
 897         }
 898
 899         irg = get_irn_irg(call);
 900
 901         /*
 902          * We cannot inline a recursive call. The graph must be copied before
 903          * the call the inline_method() using create_irg_copy().
 904          */
 905         if (called_graph == irg)
 906                 return 0;
 907
 908         /*
 909          * currently, we cannot inline two cases:
 910          * - call with compound arguments
 911          * - graphs that take the address of a parameter
 912          */
 913         if (! can_inline(call, called_graph))
 914                 return 0;
 915
 916         rem = current_ir_graph;
 917         current_ir_graph = irg;
 918
 919         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
 920
 921         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
 922         rem_opt = get_opt_optimize();
 923         set_optimize(0);
 924
 925         /* Handle graph state */
 926         assert(get_irg_phase_state(irg) != phase_building);
 927         assert(get_irg_pinned(irg) == op_pin_state_pinned);
 928         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
 929         set_irg_outs_inconsistent(irg);
 930         set_irg_extblk_inconsistent(irg);
 931         set_irg_doms_inconsistent(irg);
 932         set_irg_loopinfo_inconsistent(irg);
 933         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 934
 935         /* -- Check preconditions -- */
 936         assert(is_Call(call));
 937
 938         /* here we know we WILL inline, so inform the statistics */
 939         hook_inline(call, called_graph);
 940
 941         /* -- Decide how to handle exception control flow: Is there a handler
 942            for the Call node, or do we branch directly to End on an exception?
 943            exc_handling:
 944            0 There is a handler.
 945            1 Branches to End.
 946            2 Exception handling not represented in Firm. -- */
 947         {
 948                 ir_node *proj, *Mproj = NULL, *Xproj = NULL;
 949                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
 950                         long proj_nr = get_Proj_proj(proj);
 951                         if (proj_nr == pn_Call_X_except) Xproj = proj;
 952                         if (proj_nr == pn_Call_M_except) Mproj = proj;
 953                 }
 954                 if      (Mproj) { assert(Xproj); exc_handling = exc_handler; } /*  Mproj           */
 955                 else if (Xproj) {                exc_handling = exc_to_end; } /* !Mproj &&  Xproj   */
 956                 else            {                exc_handling = exc_no_handler; } /* !Mproj && !Xproj   */
 957         }
 958
 959         /* create the argument tuple */
 960         NEW_ARR_A(ir_type *, args_in, n_params);
 961
 962         block = get_nodes_block(call);
 963         for (i = n_params - 1; i >= 0; --i) {
 964                 ir_node *arg      = get_Call_param(call, i);
 965                 ir_type *param_tp = get_method_param_type(mtp, i);
 966                 ir_mode *mode     = get_type_mode(param_tp);
 967
 968                 if (mode != get_irn_mode(arg)) {
 969                         arg = new_r_Conv(irg, block, arg, mode);
 970                 }
 971                 args_in[i] = arg;
 972         }
 973
 974         /* --
 975            the procedure and later replaces the Start node of the called graph.
 976            Post_call is the old Call node and collects the results of the called
 977            graph. Both will end up being a tuple.  -- */
 978         post_bl = get_nodes_block(call);
 979         set_irg_current_block(irg, post_bl);
 980         /* XxMxPxPxPxT of Start + parameter of Call */
 981         in[pn_Start_X_initial_exec]   = new_Jmp();
 982         in[pn_Start_M]                = get_Call_mem(call);
 983         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
 984         in[pn_Start_P_tls]            = get_irg_tls(irg);
 985         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
 986         /* in[pn_Start_P_value_arg_base] = ??? */
 987         assert(pn_Start_P_value_arg_base == pn_Start_max - 1 && "pn_Start_P_value_arg_base not supported, fix");
 988         pre_call = new_Tuple(pn_Start_max - 1, in);
 989         post_call = call;
 990
 991         /* --
 992            The new block gets the ins of the old block, pre_call and all its
 993            predecessors and all Phi nodes. -- */
 994         part_block(pre_call);
 995
 996         /* -- Prepare state for dead node elimination -- */
 997         /* Visited flags in calling irg must be >= flag in called irg.
 998            Else walker and arity computation will not work. */
 999         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
1000                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
1001         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
1002                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
1003         visited = get_irg_visited(irg);
1004
1005         /* Set pre_call as new Start node in link field of the start node of
1006            calling graph and pre_calls block as new block for the start block
1007            of calling graph.
1008            Further mark these nodes so that they are not visited by the
1009            copying. */
1010         set_irn_link(get_irg_start(called_graph), pre_call);
1011         set_irn_visited(get_irg_start(called_graph), visited);
1012         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1013         set_irn_visited(get_irg_start_block(called_graph), visited);
1014
1015         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1016         set_irn_visited(get_irg_bad(called_graph), visited);
1017
1018         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1019         set_irn_visited(get_irg_no_mem(called_graph), visited);
1020
1021         /* Initialize for compaction of in arrays */
1022         inc_irg_block_visited(irg);
1023
1024         /* -- Replicate local entities of the called_graph -- */
1025         /* copy the entities. */
1026         called_frame = get_irg_frame_type(called_graph);
1027         curr_frame   = get_irg_frame_type(irg);
1028         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1029                 ir_entity *new_ent, *old_ent;
1030                 old_ent = get_class_member(called_frame, i);
1031                 new_ent = copy_entity_own(old_ent, curr_frame);
1032                 set_entity_link(old_ent, new_ent);
1033         }
1034
1035         /* visited is > than that of called graph.  With this trick visited will
1036            remain unchanged so that an outer walker, e.g., searching the call nodes
1037             to inline, calling this inline will not visit the inlined nodes. */
1038         set_irg_visited(irg, get_irg_visited(irg)-1);
1039
1040         /* -- Performing dead node elimination inlines the graph -- */
1041         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1042            entities. */
1043         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1044                  get_irg_frame_type(called_graph));
1045
1046         /* Repair called_graph */
1047         set_irg_visited(called_graph, get_irg_visited(irg));
1048         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1049         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1050
1051         /* -- Merge the end of the inlined procedure with the call site -- */
1052         /* We will turn the old Call node into a Tuple with the following
1053            predecessors:
1054            -1:  Block of Tuple.
1055            0: Phi of all Memories of Return statements.
1056            1: Jmp from new Block that merges the control flow from all exception
1057            predecessors of the old end block.
1058            2: Tuple of all arguments.
1059            3: Phi of Exception memories.
1060            In case the old Call directly branches to End on an exception we don't
1061            need the block merging all exceptions nor the Phi of the exception
1062            memories.
1063         */
1064
1065         /* -- Precompute some values -- */
1066         end_bl = get_new_node(get_irg_end_block(called_graph));
1067         end = get_new_node(get_irg_end(called_graph));
1068         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1069         n_res = get_method_n_ress(get_Call_type(call));
1070
1071         res_pred = xmalloc(n_res * sizeof(*res_pred));
1072         cf_pred  = xmalloc(arity * sizeof(*res_pred));
1073
1074         set_irg_current_block(irg, post_bl); /* just to make sure */
1075
1076         /* -- archive keepalives -- */
1077         irn_arity = get_irn_arity(end);
1078         for (i = 0; i < irn_arity; i++) {
1079                 ir_node *ka = get_End_keepalive(end, i);
1080                 if (! is_Bad(ka))
1081                         add_End_keepalive(get_irg_end(irg), ka);
1082         }
1083
1084         /* The new end node will die.  We need not free as the in array is on the obstack:
1085            copy_node() only generated 'D' arrays. */
1086
1087         /* -- Replace Return nodes by Jump nodes. -- */
1088         n_ret = 0;
1089         for (i = 0; i < arity; i++) {
1090                 ir_node *ret;
1091                 ret = get_Block_cfgpred(end_bl, i);
1092                 if (is_Return(ret)) {
1093                         cf_pred[n_ret] = new_r_Jmp(irg, get_nodes_block(ret));
1094                         n_ret++;
1095                 }
1096         }
1097         set_irn_in(post_bl, n_ret, cf_pred);
1098
1099         /* -- Build a Tuple for all results of the method.
1100            Add Phi node if there was more than one Return.  -- */
1101         turn_into_tuple(post_call, pn_Call_max);
1102         /* First the Memory-Phi */
1103         n_ret = 0;
1104         for (i = 0; i < arity; i++) {
1105                 ret = get_Block_cfgpred(end_bl, i);
1106                 if (is_Return(ret)) {
1107                         cf_pred[n_ret] = get_Return_mem(ret);
1108                         n_ret++;
1109                 }
1110         }
1111         phi = new_Phi(n_ret, cf_pred, mode_M);
1112         set_Tuple_pred(call, pn_Call_M_regular, phi);
1113         /* Conserve Phi-list for further inlinings -- but might be optimized */
1114         if (get_nodes_block(phi) == post_bl) {
1115                 set_irn_link(phi, get_irn_link(post_bl));
1116                 set_irn_link(post_bl, phi);
1117         }
1118         /* Now the real results */
1119         if (n_res > 0) {
1120                 for (j = 0; j < n_res; j++) {
1121                         n_ret = 0;
1122                         for (i = 0; i < arity; i++) {
1123                                 ret = get_Block_cfgpred(end_bl, i);
1124                                 if (is_Return(ret)) {
1125                                         cf_pred[n_ret] = get_Return_res(ret, j);
1126                                         n_ret++;
1127                                 }
1128                         }
1129                         if (n_ret > 0)
1130                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1131                         else
1132                                 phi = new_Bad();
1133                         res_pred[j] = phi;
1134                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1135                         if (get_nodes_block(phi) == post_bl) {
1136                                 set_Phi_next(phi, get_Block_phis(post_bl));
1137                                 set_Block_phis(post_bl, phi);
1138                         }
1139                 }
1140                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1141         } else {
1142                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1143         }
1144         /* handle the regular call */
1145         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1146
1147         /* For now, we cannot inline calls with value_base */
1148         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1149
1150         /* Finally the exception control flow.
1151            We have two (three) possible situations:
1152            First if the Call branches to an exception handler: We need to add a Phi node to
1153            collect the memory containing the exception objects.  Further we need
1154            to add another block to get a correct representation of this Phi.  To
1155            this block we add a Jmp that resolves into the X output of the Call
1156            when the Call is turned into a tuple.
1157            Second the Call branches to End, the exception is not handled.  Just
1158            add all inlined exception branches to the End node.
1159            Third: there is no Exception edge at all. Handle as case two. */
1160         if (exc_handling == exc_handler) {
1161                 n_exc = 0;
1162                 for (i = 0; i < arity; i++) {
1163                         ir_node *ret, *irn;
1164                         ret = get_Block_cfgpred(end_bl, i);
1165                         irn = skip_Proj(ret);
1166                         if (is_fragile_op(irn) || is_Raise(irn)) {
1167                                 cf_pred[n_exc] = ret;
1168                                 ++n_exc;
1169                         }
1170                 }
1171                 if (n_exc > 0) {
1172                         new_Block(n_exc, cf_pred);      /* watch it: current_block is changed! */
1173                         set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1174                         /* The Phi for the memories with the exception objects */
1175                         n_exc = 0;
1176                         for (i = 0; i < arity; i++) {
1177                                 ir_node *ret;
1178                                 ret = skip_Proj(get_Block_cfgpred(end_bl, i));
1179                                 if (is_Call(ret)) {
1180                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 3);
1181                                         n_exc++;
1182                                 } else if (is_fragile_op(ret)) {
1183                                         /* We rely that all cfops have the memory output at the same position. */
1184                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 0);
1185                                         n_exc++;
1186                                 } else if (is_Raise(ret)) {
1187                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 1);
1188                                         n_exc++;
1189                                 }
1190                         }
1191                         set_Tuple_pred(call, pn_Call_M_except, new_Phi(n_exc, cf_pred, mode_M));
1192                 } else {
1193                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1194                         set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1195                 }
1196         } else {
1197                 ir_node *main_end_bl;
1198                 int main_end_bl_arity;
1199                 ir_node **end_preds;
1200
1201                 /* assert(exc_handling == 1 || no exceptions. ) */
1202                 n_exc = 0;
1203                 for (i = 0; i < arity; i++) {
1204                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1205                         ir_node *irn = skip_Proj(ret);
1206
1207                         if (is_fragile_op(irn) || is_Raise(irn)) {
1208                                 cf_pred[n_exc] = ret;
1209                                 n_exc++;
1210                         }
1211                 }
1212                 main_end_bl = get_irg_end_block(irg);
1213                 main_end_bl_arity = get_irn_arity(main_end_bl);
1214                 end_preds =  xmalloc((n_exc + main_end_bl_arity) * sizeof(*end_preds));
1215
1216                 for (i = 0; i < main_end_bl_arity; ++i)
1217                         end_preds[i] = get_irn_n(main_end_bl, i);
1218                 for (i = 0; i < n_exc; ++i)
1219                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1220                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1221                 set_Tuple_pred(call, pn_Call_X_except,  new_Bad());
1222                 set_Tuple_pred(call, pn_Call_M_except,  new_Bad());
1223                 free(end_preds);
1224         }
1225         free(res_pred);
1226         free(cf_pred);
1227
1228         /* --  Turn CSE back on. -- */
1229         set_optimize(rem_opt);
1230         current_ir_graph = rem;
1231
1232         return 1;
1233 }
1234
1235 /********************************************************************/
1236 /* Apply inlineing to small methods.                                */
1237 /********************************************************************/
1238
1239 static struct obstack  temp_obst;
1240
1241 /** Represents a possible inlinable call in a graph. */
1242 typedef struct _call_entry {
1243         ir_node    *call;       /**< the Call node */
1244         ir_graph   *callee;     /**< the callee IR-graph called here */
1245         list_head  list;        /**< for linking the next one */
1246         int        loop_depth;  /**< the loop depth of this call */
1247         int        benefice;    /**< calculated benefice of this call */
1248         unsigned   local_adr:1; /**< Set if this calls get an address of a local variable. */
1249         unsigned   all_const:1; /**< Set if this calls has only constant parameters. */
1250 } call_entry;
1251
1252 /**
1253  * environment for inlining small irgs
1254  */
1255 typedef struct _inline_env_t {
1256         struct obstack obst;  /**< an obstack where call_entries are allocated on. */
1257         list_head      calls; /**< the call entry list */
1258 } inline_env_t;
1259
1260 /**
1261  * Returns the irg called from a Call node. If the irg is not
1262  * known, NULL is returned.
1263  *
1264  * @param call  the call node
1265  */
1266 static ir_graph *get_call_called_irg(ir_node *call) {
1267         ir_node *addr;
1268
1269         addr = get_Call_ptr(call);
1270         if (is_Global(addr)) {
1271                 ir_entity *ent = get_Global_entity(addr);
1272                 return get_entity_irg(ent);
1273         }
1274
1275         return NULL;
1276 }
1277
1278 /**
1279  * Walker: Collect all calls to known graphs inside a graph.
1280  */
1281 static void collect_calls(ir_node *call, void *env) {
1282         (void) env;
1283         if (is_Call(call)) {
1284                 ir_graph *called_irg = get_call_called_irg(call);
1285
1286                 if (called_irg != NULL) {
1287                         /* The Call node calls a locally defined method.  Remember to inline. */
1288                         inline_env_t *ienv  = env;
1289                         call_entry   *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
1290                         entry->call       = call;
1291                         entry->callee     = called_irg;
1292                         entry->loop_depth = 0;
1293                         entry->benefice   = 0;
1294                         entry->local_adr  = 0;
1295                         entry->all_const  = 0;
1296
1297                         list_add_tail(&entry->list, &ienv->calls);
1298                 }
1299         }
1300 }
1301
1302 /**
1303  * Inlines all small methods at call sites where the called address comes
1304  * from a Const node that references the entity representing the called
1305  * method.
1306  * The size argument is a rough measure for the code size of the method:
1307  * Methods where the obstack containing the firm graph is smaller than
1308  * size are inlined.
1309  */
1310 void inline_small_irgs(ir_graph *irg, int size) {
1311         ir_graph *rem = current_ir_graph;
1312         inline_env_t env;
1313         call_entry *entry;
1314
1315         current_ir_graph = irg;
1316         /* Handle graph state */
1317         assert(get_irg_phase_state(irg) != phase_building);
1318         free_callee_info(irg);
1319
1320         /* Find Call nodes to inline.
1321            (We can not inline during a walk of the graph, as inlineing the same
1322            method several times changes the visited flag of the walked graph:
1323            after the first inlineing visited of the callee equals visited of
1324            the caller.  With the next inlineing both are increased.) */
1325         obstack_init(&env.obst);
1326         INIT_LIST_HEAD(&env.calls);
1327         irg_walk_graph(irg, NULL, collect_calls, &env);
1328
1329         if (! list_empty(&env.calls)) {
1330                 /* There are calls to inline */
1331                 collect_phiprojs(irg);
1332
1333                 list_for_each_entry(call_entry, entry, &env.calls, list) {
1334                         ir_graph            *callee = entry->callee;
1335                         irg_inline_property prop    = get_irg_inline_property(callee);
1336
1337                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1338                                 /* do not inline forbidden / weak graphs */
1339                                 continue;
1340                         }
1341
1342                         if (prop >= irg_inline_forced ||
1343                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1344                                 inline_method(entry->call, callee);
1345                         }
1346                 }
1347         }
1348         obstack_free(&env.obst, NULL);
1349         current_ir_graph = rem;
1350 }
1351
1352 /**
1353  * Environment for inlining irgs.
1354  */
1355 typedef struct {
1356         list_head calls;             /**< List of of all call nodes in this graph. */
1357         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
1358         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1359         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
1360         unsigned  n_nodes_orig;      /**< for statistics */
1361         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
1362         unsigned  n_call_nodes_orig; /**< for statistics */
1363         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
1364         unsigned  n_callers_orig;    /**< for statistics */
1365         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
1366         unsigned  local_vars:1;      /**< Set, if an inlined function got the address of a local variable. */
1367         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
1368 } inline_irg_env;
1369
1370 /**
1371  * Allocate a new environment for inlining.
1372  */
1373 static inline_irg_env *alloc_inline_irg_env(void) {
1374         inline_irg_env *env    = obstack_alloc(&temp_obst, sizeof(*env));
1375         INIT_LIST_HEAD(&env->calls);
1376         env->local_weights     = NULL;
1377         env->n_nodes           = -2; /* do not count count Start, End */
1378         env->n_blocks          = -2; /* do not count count Start, End Block */
1379         env->n_nodes_orig      = -2; /* do not count Start, End */
1380         env->n_call_nodes      = 0;
1381         env->n_call_nodes_orig = 0;
1382         env->n_callers         = 0;
1383         env->n_callers_orig    = 0;
1384         env->got_inline        = 0;
1385         env->local_vars        = 0;
1386         env->recursive         = 0;
1387         return env;
1388 }
1389
1390 typedef struct walker_env {
1391         inline_irg_env *x;     /**< the inline environment */
1392         char ignore_runtime;   /**< the ignore runtime flag */
1393         char ignore_callers;   /**< if set, do change callers data */
1394 } wenv_t;
1395
1396 /**
1397  * post-walker: collect all calls in the inline-environment
1398  * of a graph and sum some statistics.
1399  */
1400 static void collect_calls2(ir_node *call, void *ctx) {
1401         wenv_t         *env = ctx;
1402         inline_irg_env *x = env->x;
1403         ir_opcode      code = get_irn_opcode(call);
1404         ir_graph       *callee;
1405         call_entry     *entry;
1406
1407         /* count meaningful nodes in irg */
1408         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1409                 if (code != iro_Block) {
1410                         ++x->n_nodes;
1411                         ++x->n_nodes_orig;
1412                 } else {
1413                         ++x->n_blocks;
1414                 }
1415         }
1416
1417         if (code != iro_Call) return;
1418
1419         /* check, if it's a runtime call */
1420         if (env->ignore_runtime) {
1421                 ir_node *symc = get_Call_ptr(call);
1422
1423                 if (is_Global(symc)) {
1424                         ir_entity *ent = get_Global_entity(symc);
1425
1426                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1427                                 return;
1428                 }
1429         }
1430
1431         /* collect all call nodes */
1432         ++x->n_call_nodes;
1433         ++x->n_call_nodes_orig;
1434
1435         callee = get_call_called_irg(call);
1436         if (callee != NULL) {
1437                 if (! env->ignore_callers) {
1438                         inline_irg_env *callee_env = get_irg_link(callee);
1439                         /* count all static callers */
1440                         ++callee_env->n_callers;
1441                         ++callee_env->n_callers_orig;
1442                 }
1443                 if (callee == current_ir_graph)
1444                         x->recursive = 1;
1445
1446                 /* link it in the list of possible inlinable entries */
1447                 entry = obstack_alloc(&temp_obst, sizeof(*entry));
1448                 entry->call       = call;
1449                 entry->callee     = callee;
1450                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1451
1452                 list_add_tail(&entry->list, &x->calls);
1453         }
1454 }
1455
1456 /**
1457  * Returns TRUE if the number of callers is 0 in the irg's environment,
1458  * hence this irg is a leave.
1459  */
1460 INLINE static int is_leave(ir_graph *irg) {
1461         inline_irg_env *env = get_irg_link(irg);
1462         return env->n_call_nodes == 0;
1463 }
1464
1465 /**
1466  * Returns TRUE if the number of nodes in the callee is
1467  * smaller then size in the irg's environment.
1468  */
1469 INLINE static int is_smaller(ir_graph *callee, unsigned size) {
1470         inline_irg_env *env = get_irg_link(callee);
1471         return env->n_nodes < size;
1472 }
1473
1474 /**
1475  * Duplicate a call entry.
1476  *
1477  * @param entry     the original entry to duplicate
1478  * @param new_call  the new call node
1479  * @param loop_depth_delta
1480  *                  delta value for the loop depth
1481  */
1482 static call_entry *duplicate_call_entry(const call_entry *entry,
1483                                         ir_node *new_call, int loop_depth_delta) {
1484         call_entry *nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1485         nentry->call       = new_call;
1486         nentry->callee     = entry->callee;
1487         nentry->benefice   = entry->benefice;
1488         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1489         nentry->local_adr  = entry->local_adr;
1490         nentry->all_const  = entry->all_const;
1491
1492         return nentry;
1493 }
1494
1495 /**
1496  * Append all call nodes of the source environment to the nodes of in the destination
1497  * environment.
1498  *
1499  * @param dst         destination environment
1500  * @param src         source environment
1501  * @param loop_depth  the loop depth of the call that is replaced by the src list
1502  */
1503 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth) {
1504         call_entry *entry, *nentry;
1505
1506         /* Note that the src list points to Call nodes in the inlined graph, but
1507            we need Call nodes in our graph. Luckily the inliner leaves this information
1508            in the link field. */
1509         list_for_each_entry(call_entry, entry, &src->calls, list) {
1510                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
1511                 list_add_tail(&nentry->list, &dst->calls);
1512         }
1513         dst->n_call_nodes += src->n_call_nodes;
1514         dst->n_nodes      += src->n_nodes;
1515 }
1516
1517 /*
1518  * Inlines small leave methods at call sites where the called address comes
1519  * from a Const node that references the entity representing the called
1520  * method.
1521  * The size argument is a rough measure for the code size of the method:
1522  * Methods where the obstack containing the firm graph is smaller than
1523  * size are inlined.
1524  */
1525 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1526                             unsigned size, int ignore_runtime)
1527 {
1528         inline_irg_env   *env;
1529         ir_graph         *irg;
1530         int              i, n_irgs;
1531         ir_graph         *rem;
1532         int              did_inline;
1533         wenv_t           wenv;
1534         call_entry       *entry, *next;
1535         const call_entry *centry;
1536         pmap             *copied_graphs;
1537         pmap_entry       *pm_entry;
1538
1539         rem = current_ir_graph;
1540         obstack_init(&temp_obst);
1541
1542         /* a map for the copied graphs, used to inline recursive calls */
1543         copied_graphs = pmap_create();
1544
1545         /* extend all irgs by a temporary data structure for inlining. */
1546         n_irgs = get_irp_n_irgs();
1547         for (i = 0; i < n_irgs; ++i)
1548                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1549
1550         /* Precompute information in temporary data structure. */
1551         wenv.ignore_runtime = ignore_runtime;
1552         wenv.ignore_callers = 0;
1553         for (i = 0; i < n_irgs; ++i) {
1554                 ir_graph *irg = get_irp_irg(i);
1555
1556                 assert(get_irg_phase_state(irg) != phase_building);
1557                 free_callee_info(irg);
1558
1559                 assure_cf_loop(irg);
1560                 wenv.x = get_irg_link(irg);
1561                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1562         }
1563
1564         /* -- and now inline. -- */
1565
1566         /* Inline leaves recursively -- we might construct new leaves. */
1567         do {
1568                 did_inline = 0;
1569
1570                 for (i = 0; i < n_irgs; ++i) {
1571                         ir_node *call;
1572                         int phiproj_computed = 0;
1573
1574                         current_ir_graph = get_irp_irg(i);
1575                         env              = get_irg_link(current_ir_graph);
1576
1577                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1578                                 ir_graph            *callee;
1579                                 irg_inline_property  prop;
1580
1581                                 if (env->n_nodes > maxsize)
1582                                         break;
1583
1584                                 call   = entry->call;
1585                                 callee = entry->callee;
1586
1587                                 prop = get_irg_inline_property(callee);
1588                                 if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1589                                         /* do not inline forbidden / weak graphs */
1590                                         continue;
1591                                 }
1592
1593                                 if (is_leave(callee) && (
1594                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1595                                         if (!phiproj_computed) {
1596                                                 phiproj_computed = 1;
1597                                                 collect_phiprojs(current_ir_graph);
1598                                         }
1599                                         did_inline = inline_method(call, callee);
1600
1601                                         if (did_inline) {
1602                                                 inline_irg_env *callee_env = get_irg_link(callee);
1603
1604                                                 /* was inlined, must be recomputed */
1605                                                 phiproj_computed = 0;
1606
1607                                                 /* Do some statistics */
1608                                                 env->got_inline = 1;
1609                                                 --env->n_call_nodes;
1610                                                 env->n_nodes += callee_env->n_nodes;
1611                                                 --callee_env->n_callers;
1612
1613                                                 /* remove this call from the list */
1614                                                 list_del(&entry->list);
1615                                                 continue;
1616                                         }
1617                                 }
1618                         }
1619                 }
1620         } while (did_inline);
1621
1622         /* inline other small functions. */
1623         for (i = 0; i < n_irgs; ++i) {
1624                 ir_node *call;
1625                 int phiproj_computed = 0;
1626
1627                 current_ir_graph = get_irp_irg(i);
1628                 env              = get_irg_link(current_ir_graph);
1629
1630                 /* note that the list of possible calls is updated during the process */
1631                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1632                         irg_inline_property prop;
1633                         ir_graph            *callee;
1634                         pmap_entry          *e;
1635
1636                         call   = entry->call;
1637                         callee = entry->callee;
1638
1639                         prop = get_irg_inline_property(callee);
1640                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1641                                 /* do not inline forbidden / weak graphs */
1642                                 continue;
1643                         }
1644
1645                         e = pmap_find(copied_graphs, callee);
1646                         if (e != NULL) {
1647                                 /*
1648                                  * Remap callee if we have a copy.
1649                                  * FIXME: Should we do this only for recursive Calls ?
1650                                  */
1651                                 callee = e->value;
1652                         }
1653
1654                         if (prop >= irg_inline_forced ||
1655                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1656                                 if (current_ir_graph == callee) {
1657                                         /*
1658                                          * Recursive call: we cannot directly inline because we cannot walk
1659                                          * the graph and change it. So we have to make a copy of the graph
1660                                          * first.
1661                                          */
1662
1663                                         inline_irg_env *callee_env;
1664                                         ir_graph       *copy;
1665
1666                                         /*
1667                                          * No copy yet, create one.
1668                                          * Note that recursive methods are never leaves, so it is sufficient
1669                                          * to test this condition here.
1670                                          */
1671                                         copy = create_irg_copy(callee);
1672
1673                                         /* create_irg_copy() destroys the Proj links, recompute them */
1674                                         phiproj_computed = 0;
1675
1676                                         /* allocate new environment */
1677                                         callee_env = alloc_inline_irg_env();
1678                                         set_irg_link(copy, callee_env);
1679
1680                                         assure_cf_loop(copy);
1681                                         wenv.x              = callee_env;
1682                                         wenv.ignore_callers = 1;
1683                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1684
1685                                         /*
1686                                          * Enter the entity of the original graph. This is needed
1687                                          * for inline_method(). However, note that ent->irg still points
1688                                          * to callee, NOT to copy.
1689                                          */
1690                                         set_irg_entity(copy, get_irg_entity(callee));
1691
1692                                         pmap_insert(copied_graphs, callee, copy);
1693                                         callee = copy;
1694
1695                                         /* we have only one caller: the original graph */
1696                                         callee_env->n_callers      = 1;
1697                                         callee_env->n_callers_orig = 1;
1698                                 }
1699                                 if (! phiproj_computed) {
1700                                         phiproj_computed = 1;
1701                                         collect_phiprojs(current_ir_graph);
1702                                 }
1703                                 did_inline = inline_method(call, callee);
1704                                 if (did_inline) {
1705                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1706
1707                                         /* was inlined, must be recomputed */
1708                                         phiproj_computed = 0;
1709
1710                                         /* callee was inline. Append it's call list. */
1711                                         env->got_inline = 1;
1712                                         --env->n_call_nodes;
1713                                         append_call_list(env, callee_env, entry->loop_depth);
1714                                         --callee_env->n_callers;
1715
1716                                         /* after we have inlined callee, all called methods inside callee
1717                                            are now called once more */
1718                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1719                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1720                                                 ++penv->n_callers;
1721                                         }
1722
1723                                         /* remove this call from the list */
1724                                         list_del(&entry->list);
1725                                         continue;
1726                                 }
1727                         }
1728                 }
1729         }
1730
1731         for (i = 0; i < n_irgs; ++i) {
1732                 irg = get_irp_irg(i);
1733                 env = get_irg_link(irg);
1734
1735                 if (env->got_inline) {
1736                         optimize_graph_df(irg);
1737                         optimize_cf(irg);
1738                 }
1739                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1740                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1741                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1742                         env->n_callers_orig, env->n_callers,
1743                         get_entity_name(get_irg_entity(irg))));
1744                 }
1745         }
1746
1747         /* kill the copied graphs: we don't need them anymore */
1748         foreach_pmap(copied_graphs, pm_entry) {
1749                 ir_graph *copy = pm_entry->value;
1750
1751                 /* reset the entity, otherwise it will be deleted in the next step ... */
1752                 set_irg_entity(copy, NULL);
1753                 free_ir_graph(copy);
1754         }
1755         pmap_destroy(copied_graphs);
1756
1757         obstack_free(&temp_obst, NULL);
1758         current_ir_graph = rem;
1759 }
1760
1761 /**
1762  * Calculate the parameter weights for transmitting the address of a local variable.
1763  */
1764 static unsigned calc_method_local_weight(ir_node *arg) {
1765         int      i, j, k;
1766         unsigned v, weight = 0;
1767
1768         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1769                 ir_node *succ = get_irn_out(arg, i);
1770
1771                 switch (get_irn_opcode(succ)) {
1772                 case iro_Load:
1773                 case iro_Store:
1774                         /* Loads and Store can be removed */
1775                         weight += 3;
1776                         break;
1777                 case iro_Sel:
1778                         /* check if all args are constant */
1779                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1780                                 ir_node *idx = get_Sel_index(succ, j);
1781                                 if (! is_Const(idx))
1782                                         return 0;
1783                         }
1784                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1785                            some unsupported node. */
1786                         v = calc_method_local_weight(succ);
1787                         if (v == 0)
1788                                 return 0;
1789                         /* we can kill one Sel with constant indexes, this is cheap */
1790                         weight += v + 1;
1791                         break;
1792                 case iro_Id:
1793                         /* when looking backward we might find Id nodes */
1794                         weight += calc_method_local_weight(succ);
1795                         break;
1796                 case iro_Tuple:
1797                         /* unoptimized tuple */
1798                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1799                                 ir_node *pred = get_Tuple_pred(succ, j);
1800                                 if (pred == arg) {
1801                                         /* look for Proj(j) */
1802                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1803                                                 ir_node *succ_succ = get_irn_out(succ, k);
1804                                                 if (is_Proj(succ_succ)) {
1805                                                         if (get_Proj_proj(succ_succ) == j) {
1806                                                                 /* found */
1807                                                                 weight += calc_method_local_weight(succ_succ);
1808                                                         }
1809                                                 } else {
1810                                                         /* this should NOT happen */
1811                                                         return 0;
1812                                                 }
1813                                         }
1814                                 }
1815                         }
1816                         break;
1817                 default:
1818                         /* any other node: unsupported yet or bad. */
1819                         return 0;
1820                 }
1821         }
1822         return weight;
1823 }
1824
1825 /**
1826  * Calculate the parameter weights for transmitting the address of a local variable.
1827  */
1828 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
1829         ir_entity *ent = get_irg_entity(irg);
1830         ir_type  *mtp;
1831         int      nparams, i, proj_nr;
1832         ir_node  *irg_args, *arg;
1833
1834         mtp      = get_entity_type(ent);
1835         nparams  = get_method_n_params(mtp);
1836
1837         /* allocate a new array. currently used as 'analysed' flag */
1838         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1839
1840         /* If the method haven't parameters we have nothing to do. */
1841         if (nparams <= 0)
1842                 return;
1843
1844         assure_irg_outs(irg);
1845         irg_args = get_irg_args(irg);
1846         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1847                 arg     = get_irn_out(irg_args, i);
1848                 proj_nr = get_Proj_proj(arg);
1849                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1850         }
1851 }
1852
1853 /**
1854  * Calculate the benefice for transmitting an local variable address.
1855  * After inlining, the local variable might be transformed into a
1856  * SSA variable by scalar_replacement().
1857  */
1858 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
1859         inline_irg_env *env = get_irg_link(callee);
1860
1861         if (env->local_weights != NULL) {
1862                 if (pos < ARR_LEN(env->local_weights))
1863                         return env->local_weights[pos];
1864                 return 0;
1865         }
1866
1867         analyze_irg_local_weights(env, callee);
1868
1869         if (pos < ARR_LEN(env->local_weights))
1870                 return env->local_weights[pos];
1871         return 0;
1872 }
1873
1874 /**
1875  * Calculate a benefice value for inlining the given call.
1876  *
1877  * @param call       the call node we have to inspect
1878  * @param callee     the called graph
1879  */
1880 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1881 {
1882         ir_node   *call = entry->call;
1883         ir_entity *ent  = get_irg_entity(callee);
1884         ir_node   *frame_ptr;
1885         ir_type   *mtp;
1886         int       weight = 0;
1887         int       i, n_params, all_const;
1888         unsigned  cc, v;
1889         irg_inline_property prop;
1890
1891         inline_irg_env *callee_env;
1892
1893         prop = get_irg_inline_property(callee);
1894         if (prop == irg_inline_forbidden) {
1895                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1896                     call, callee));
1897                 return entry->benefice = INT_MIN;
1898         }
1899
1900         if (get_irg_additional_properties(callee) & (mtp_property_noreturn | mtp_property_weak)) {
1901                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1902                     call, callee));
1903                 return entry->benefice = INT_MIN;
1904         }
1905
1906         /* costs for every passed parameter */
1907         n_params = get_Call_n_params(call);
1908         mtp      = get_entity_type(ent);
1909         cc       = get_method_calling_convention(mtp);
1910         if (cc & cc_reg_param) {
1911                 /* register parameter, smaller costs for register parameters */
1912                 int max_regs = cc & ~cc_bits;
1913
1914                 if (max_regs < n_params)
1915                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1916                 else
1917                         weight += n_params * 2;
1918         } else {
1919                 /* parameters are passed an stack */
1920                 weight += 5 * n_params;
1921         }
1922
1923         /* constant parameters improve the benefice */
1924         frame_ptr = get_irg_frame(current_ir_graph);
1925         all_const = 1;
1926         for (i = 0; i < n_params; ++i) {
1927                 ir_node *param = get_Call_param(call, i);
1928
1929                 if (is_Const(param)) {
1930                         weight += get_method_param_weight(ent, i);
1931                 } else {
1932                         all_const = 0;
1933                         if (is_SymConst(param))
1934                                 weight += get_method_param_weight(ent, i);
1935                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1936                                 /*
1937                                  * An address of a local variable is transmitted. After
1938                                  * inlining, scalar_replacement might be able to remove the
1939                                  * local variable, so honor this.
1940                                  */
1941                                 v = get_method_local_adress_weight(callee, i);
1942                                 weight += v;
1943                                 if (v > 0)
1944                                         entry->local_adr = 1;
1945                         }
1946                 }
1947         }
1948         entry->all_const = all_const;
1949
1950         callee_env = get_irg_link(callee);
1951         if (callee_env->n_callers == 1 &&
1952             callee != current_ir_graph &&
1953                 get_entity_visibility(ent) == visibility_local) {
1954                 weight += 700;
1955         }
1956
1957         /* give a bonus for functions with one block */
1958         if (callee_env->n_blocks == 1)
1959                 weight = weight * 3 / 2;
1960
1961         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1962         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1963                 weight += 2000;
1964
1965         /* and finally for leaves: they do not increase the register pressure
1966            because of callee safe registers */
1967         if (callee_env->n_call_nodes == 0)
1968                 weight += 400;
1969
1970         /** it's important to inline inner loops first */
1971         if (entry->loop_depth > 30)
1972                 weight += 30 * 1024;
1973         else
1974                 weight += entry->loop_depth * 1024;
1975
1976         /*
1977          * All arguments constant is probably a good sign, give an extra bonus
1978          */
1979         if (all_const)
1980                 weight += 1024;
1981
1982         return entry->benefice = weight;
1983 }
1984
1985 static ir_graph **irgs;
1986 static int      last_irg;
1987
1988 /**
1989  * Callgraph walker, collect all visited graphs.
1990  */
1991 static void callgraph_walker(ir_graph *irg, void *data) {
1992         (void) data;
1993         irgs[last_irg++] = irg;
1994 }
1995
1996 /**
1997  * Creates an inline order for all graphs.
1998  *
1999  * @return the list of graphs.
2000  */
2001 static ir_graph **create_irg_list(void) {
2002         ir_entity **free_methods;
2003         int       arr_len;
2004         int       n_irgs = get_irp_n_irgs();
2005
2006         cgana(&arr_len, &free_methods);
2007         xfree(free_methods);
2008
2009         compute_callgraph();
2010
2011         last_irg = 0;
2012         irgs     = xmalloc(n_irgs * sizeof(*irgs));
2013         memset(irgs, 0, sizeof(n_irgs * sizeof(*irgs)));
2014
2015         callgraph_walk(NULL, callgraph_walker, NULL);
2016         assert(n_irgs == last_irg);
2017
2018         return irgs;
2019 }
2020
2021 /**
2022  * Push a call onto the priority list if its
2023  * benefice is big enough.
2024  *
2025  * @param pqueue   the priority queue of calls
2026  * @param call     the call entry
2027  * @param inlien_threshold
2028  *                 the threshold value
2029  */
2030 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2031                             int inline_threshold)
2032 {
2033         int                 benefice;
2034         ir_graph            *callee  = call->callee;
2035         irg_inline_property prop     = get_irg_inline_property(callee);
2036
2037         if (prop & irg_inline_forced) {
2038                 /* give them a big benefice, so forced are inline first */
2039                 benefice = 100000 + call->loop_depth;
2040                 call->benefice = benefice;
2041                 DB((dbg, LEVEL_2, "In %+F Call %+F to %+F is forced\n",
2042                         get_irn_irg(call->call), call->call, callee));
2043         } else {
2044                 benefice = calc_inline_benefice(call, callee);
2045                 DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2046                         get_irn_irg(call->call), call->call, callee, benefice));
2047         }
2048
2049         if (benefice < inline_threshold && !(prop & irg_inline_forced))
2050                 return;
2051
2052         pqueue_put(pqueue, call, benefice);
2053 }
2054
2055 /**
2056  * Try to inline calls into a graph.
2057  *
2058  * @param irg      the graph into which we inline
2059  * @param maxsize  do NOT inline if the size of irg gets
2060  *                 bigger than this amount
2061  * @param inline_threshold
2062  *                 threshold value for inline decision
2063  * @param copied_graphs
2064  *                 map containing copied of recursive graphs
2065  */
2066 static void inline_into(ir_graph *irg, unsigned maxsize,
2067                         int inline_threshold, pmap *copied_graphs)
2068 {
2069         int            phiproj_computed = 0;
2070         inline_irg_env *env = get_irg_link(irg);
2071         call_entry     *curr_call;
2072         wenv_t         wenv;
2073         pqueue_t       *pqueue;
2074
2075         if (env->n_call_nodes == 0)
2076                 return;
2077
2078         if (env->n_nodes > maxsize) {
2079                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2080                 return;
2081         }
2082
2083         current_ir_graph = irg;
2084
2085         /* put irgs into the pqueue */
2086         pqueue = new_pqueue();
2087
2088         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
2089                 assert(is_Call(curr_call->call));
2090                 maybe_push_call(pqueue, curr_call, inline_threshold);
2091         }
2092
2093         /* note that the list of possible calls is updated during the process */
2094         while (!pqueue_empty(pqueue)) {
2095                 int                 did_inline;
2096                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
2097                 ir_graph            *callee     = curr_call->callee;
2098                 ir_node             *call_node  = curr_call->call;
2099                 inline_irg_env      *callee_env = get_irg_link(callee);
2100                 int                 loop_depth;
2101                 const call_entry    *centry;
2102                 pmap_entry          *e;
2103
2104                 /* we need a hard limit here, else it would be possible to inline
2105                  * recursive functions forever. */
2106                 if (env->n_nodes + callee_env->n_nodes > maxsize) {
2107                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2108                                                 env->n_nodes, callee, callee_env->n_nodes));
2109                         continue;
2110                 }
2111
2112                 e = pmap_find(copied_graphs, callee);
2113                 if (e != NULL) {
2114                         int benefice = curr_call->benefice;
2115                         /*
2116                          * Reduce the weight for recursive function IFF not all arguments are const.
2117                          * inlining recursive functions is rarely good.
2118                          */
2119                         if (!curr_call->all_const)
2120                                 benefice -= 2000;
2121                         if (benefice < inline_threshold)
2122                                 continue;
2123
2124                         /*
2125                          * Remap callee if we have a copy.
2126                          */
2127                         callee     = e->value;
2128                         callee_env = get_irg_link(callee);
2129                 }
2130
2131                 if (current_ir_graph == callee) {
2132                         /*
2133                          * Recursive call: we cannot directly inline because we cannot
2134                          * walk the graph and change it. So we have to make a copy of
2135                          * the graph first.
2136                          */
2137                         int benefice = curr_call->benefice;
2138                         ir_graph *copy;
2139
2140                         /*
2141                          * Reduce the weight for recursive function IFF not all arguments are const.
2142                          * inlining recursive functions is rarely good.
2143                          */
2144                         if (!curr_call->all_const)
2145                                 benefice -= 2000;
2146                         if (benefice < inline_threshold)
2147                                 continue;
2148
2149                         /*
2150                          * No copy yet, create one.
2151                          * Note that recursive methods are never leaves, so it is
2152                          * sufficient to test this condition here.
2153                          */
2154                         copy = create_irg_copy(callee);
2155
2156                         /* create_irg_copy() destroys the Proj links, recompute them */
2157                         phiproj_computed = 0;
2158
2159                         /* allocate a new environment */
2160                         callee_env = alloc_inline_irg_env();
2161                         set_irg_link(copy, callee_env);
2162
2163                         assure_cf_loop(copy);
2164                         wenv.x              = callee_env;
2165                         wenv.ignore_callers = 1;
2166                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2167
2168                         /*
2169                          * Enter the entity of the original graph. This is needed
2170                          * for inline_method(). However, note that ent->irg still points
2171                          * to callee, NOT to copy.
2172                          */
2173                         set_irg_entity(copy, get_irg_entity(callee));
2174
2175                         pmap_insert(copied_graphs, callee, copy);
2176                         callee = copy;
2177
2178                         /* we have only one caller: the original graph */
2179                         callee_env->n_callers      = 1;
2180                         callee_env->n_callers_orig = 1;
2181                 }
2182                 if (! phiproj_computed) {
2183                         phiproj_computed = 1;
2184                         collect_phiprojs(current_ir_graph);
2185                 }
2186                 did_inline = inline_method(call_node, callee);
2187                 if (!did_inline)
2188                         continue;
2189
2190                 /* got inlined, must be recomputed */
2191                 phiproj_computed = 0;
2192
2193                 /* remove it from the caller list */
2194                 list_del(&curr_call->list);
2195
2196                 /* callee was inline. Append it's call list. */
2197                 env->got_inline = 1;
2198                 if (curr_call->local_adr)
2199                         env->local_vars = 1;
2200                 --env->n_call_nodes;
2201
2202                 /* we just generate a bunch of new calls */
2203                 loop_depth = curr_call->loop_depth;
2204                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
2205                         inline_irg_env *penv = get_irg_link(centry->callee);
2206                         ir_node        *new_call;
2207                         call_entry     *new_entry;
2208
2209                         /* after we have inlined callee, all called methods inside
2210                          * callee are now called once more */
2211                         ++penv->n_callers;
2212
2213                         /* Note that the src list points to Call nodes in the inlined graph,
2214                          * but we need Call nodes in our graph. Luckily the inliner leaves
2215                          * this information in the link field. */
2216                         new_call = get_irn_link(centry->call);
2217                         assert(is_Call(new_call));
2218
2219                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
2220                         list_add_tail(&new_entry->list, &env->calls);
2221                         maybe_push_call(pqueue, new_entry, inline_threshold);
2222                 }
2223
2224                 env->n_call_nodes += callee_env->n_call_nodes;
2225                 env->n_nodes += callee_env->n_nodes;
2226                 --callee_env->n_callers;
2227         }
2228
2229         del_pqueue(pqueue);
2230 }
2231
2232 /**
2233  * Heuristic inliner. Calculates a benefice value for every call and inlines
2234  * those calls with a value higher than the threshold.
2235  */
2236 void inline_functions(unsigned maxsize, int inline_threshold) {
2237         inline_irg_env   *env;
2238         int              i, n_irgs;
2239         ir_graph         *rem;
2240         wenv_t           wenv;
2241         pmap             *copied_graphs;
2242         pmap_entry       *pm_entry;
2243         ir_graph         **irgs;
2244
2245         rem = current_ir_graph;
2246         obstack_init(&temp_obst);
2247
2248         irgs = create_irg_list();
2249
2250         /* a map for the copied graphs, used to inline recursive calls */
2251         copied_graphs = pmap_create();
2252
2253         /* extend all irgs by a temporary data structure for inlining. */
2254         n_irgs = get_irp_n_irgs();
2255         for (i = 0; i < n_irgs; ++i)
2256                 set_irg_link(irgs[i], alloc_inline_irg_env());
2257
2258         /* Precompute information in temporary data structure. */
2259         wenv.ignore_runtime = 0;
2260         wenv.ignore_callers = 0;
2261         for (i = 0; i < n_irgs; ++i) {
2262                 ir_graph *irg = irgs[i];
2263
2264                 free_callee_info(irg);
2265
2266                 wenv.x = get_irg_link(irg);
2267                 assure_cf_loop(irg);
2268                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2269         }
2270
2271         /* -- and now inline. -- */
2272         for (i = 0; i < n_irgs; ++i) {
2273                 ir_graph *irg = irgs[i];
2274
2275                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2276         }
2277
2278         for (i = 0; i < n_irgs; ++i) {
2279                 ir_graph *irg = irgs[i];
2280
2281                 env = get_irg_link(irg);
2282                 if (env->got_inline) {
2283                         /* this irg got calls inlined: optimize it */
2284
2285                         if (0) {
2286                                 /* scalar replacement does not work well with Tuple nodes, so optimize them away */
2287                                 optimize_graph_df(irg);
2288
2289                                 if (env->local_vars) {
2290                                         if (scalar_replacement_opt(irg)) {
2291                                                 optimize_graph_df(irg);
2292                                         }
2293                                 }
2294                                 optimize_cf(irg);
2295                         } else {
2296                                 if (env->local_vars) {
2297                                         scalar_replacement_opt(irg);
2298                                 }
2299                                 combo(irg);
2300                         }
2301                 }
2302                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2303                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2304                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2305                         env->n_callers_orig, env->n_callers,
2306                         get_entity_name(get_irg_entity(irg))));
2307                 }
2308         }
2309
2310         /* kill the copied graphs: we don't need them anymore */
2311         foreach_pmap(copied_graphs, pm_entry) {
2312                 ir_graph *copy = pm_entry->value;
2313
2314                 /* reset the entity, otherwise it will be deleted in the next step ... */
2315                 set_irg_entity(copy, NULL);
2316                 free_ir_graph(copy);
2317         }
2318         pmap_destroy(copied_graphs);
2319
2320         xfree(irgs);
2321
2322         obstack_free(&temp_obst, NULL);
2323         current_ir_graph = rem;
2324 }
2325
2326 void firm_init_inline(void) {
2327         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2328 }