nsz Git - libfirm/blob - ir/opt/opt_inline.c

   1 /*
   2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief    Dead node elimination and Procedure Inlining.
  23  * @author   Michael Beck, Goetz Lindenmaier
  24  * @version  $Id$
  25  */
  26 #include "config.h"
  27
  28 #include <limits.h>
  29 #include <stdbool.h>
  30 #include <assert.h>
  31
  32 #include "irnode_t.h"
  33 #include "irgraph_t.h"
  34 #include "irprog_t.h"
  35
  36 #include "iroptimize.h"
  37 #include "ircons_t.h"
  38 #include "iropt_t.h"
  39 #include "irgopt.h"
  40 #include "irgmod.h"
  41 #include "irgwalk.h"
  42
  43 #include "array_t.h"
  44 #include "list.h"
  45 #include "pset.h"
  46 #include "pmap.h"
  47 #include "pdeq.h"
  48 #include "xmalloc.h"
  49 #include "pqueue.h"
  50
  51 #include "irouts.h"
  52 #include "irloop_t.h"
  53 #include "irbackedge_t.h"
  54 #include "opt_init.h"
  55 #include "cgana.h"
  56 #include "trouts.h"
  57 #include "error.h"
  58
  59 #include "analyze_irg_args.h"
  60 #include "iredges_t.h"
  61 #include "irflag_t.h"
  62 #include "irhooks.h"
  63 #include "irtools.h"
  64 #include "iropt_dbg.h"
  65 #include "irpass_t.h"
  66 #include "irphase_t.h"
  67
  68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  69
  70 /*------------------------------------------------------------------*/
  71 /* Routines for dead node elimination / copying garbage collection  */
  72 /* of the obstack.                                                  */
  73 /*------------------------------------------------------------------*/
  74
  75 /**
  76  * Remember the new node in the old node by using a field all nodes have.
  77  */
  78 static void set_new_node(ir_node *node, ir_node *new_node)
  79 {
  80         set_irn_link(node, new_node);
  81 }
  82
  83 /**
  84  * Get this new node, before the old node is forgotten.
  85  */
  86 static inline ir_node *get_new_node(ir_node *old_node)
  87 {
  88         assert(irn_visited(old_node));
  89         return (ir_node*) get_irn_link(old_node);
  90 }
  91
  92 /*--------------------------------------------------------------------*/
  93 /*  Functionality for inlining                                         */
  94 /*--------------------------------------------------------------------*/
  95
  96 /**
  97  * Copy node for inlineing.  Updates attributes that change when
  98  * inlineing but not for dead node elimination.
  99  *
 100  * Copies the node by calling copy_node() and then updates the entity if
 101  * it's a local one.  env must be a pointer of the frame type of the
 102  * inlined procedure. The new entities must be in the link field of
 103  * the entities.
 104  */
 105 static void copy_node_inline(ir_node *node, void *env)
 106 {
 107         ir_graph *new_irg  = (ir_graph*) env;
 108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
 109
 110         set_new_node(node, new_node);
 111         if (is_Sel(node)) {
 112                 ir_graph  *old_irg        = get_irn_irg(node);
 113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
 114                 ir_entity *old_entity     = get_Sel_entity(node);
 115                 assert(is_Sel(new_node));
 116                 /* use copied entities from the new frame */
 117                 if (get_entity_owner(old_entity) == old_frame_type) {
 118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
 119                         assert(new_entity != NULL);
 120                         set_Sel_entity(new_node, new_entity);
 121                 }
 122         } else if (is_Block(new_node)) {
 123                 new_node->attr.block.irg.irg = new_irg;
 124         }
 125 }
 126
 127 static void set_preds_inline(ir_node *node, void *env)
 128 {
 129         ir_node *new_node;
 130
 131         irn_rewire_inputs(node);
 132
 133         /* move constants into start block */
 134         new_node = get_new_node(node);
 135         if (is_irn_constlike(new_node)) {
 136                 ir_graph *new_irg     = (ir_graph *) env;
 137                 ir_node  *start_block = get_irg_start_block(new_irg);
 138                 set_nodes_block(new_node, start_block);
 139         }
 140 }
 141
 142 /**
 143  * Walker: checks if P_value_arg_base is used.
 144  */
 145 static void find_addr(ir_node *node, void *env)
 146 {
 147         bool *allow_inline = (bool*)env;
 148
 149         if (is_Sel(node)) {
 150                 ir_graph *irg = current_ir_graph;
 151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
 152                         /* access to frame */
 153                         ir_entity *ent = get_Sel_entity(node);
 154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
 155                                 /* access to value_type */
 156                                 *allow_inline = false;
 157                         }
 158                 }
 159         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
 160                 /* From GCC:
 161                  * Refuse to inline alloca call unless user explicitly forced so as this
 162                  * may change program's memory overhead drastically when the function
 163                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
 164                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
 165                  *
 166                  * Sorrily this is true with our implementation also.
 167                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
 168                  * this disables inlining of functions using VLA (which are completely
 169                  * save).
 170                  *
 171                  * 2 Solutions:
 172                  * - add a flag to the Alloc node for "real" alloca() calls
 173                  * - add a new Stack-Restore node at the end of a function using
 174                  *   alloca()
 175                  */
 176                 *allow_inline = false;
 177         }
 178 }
 179
 180 /**
 181  * Check if we can inline a given call.
 182  * Currently, we cannot inline two cases:
 183  * - call with compound arguments
 184  * - graphs that take the address of a parameter
 185  *
 186  * check these conditions here
 187  */
 188 static bool can_inline(ir_node *call, ir_graph *called_graph)
 189 {
 190         ir_entity          *called      = get_irg_entity(called_graph);
 191         ir_type            *called_type = get_entity_type(called);
 192         ir_type            *call_type   = get_Call_type(call);
 193         size_t              n_params    = get_method_n_params(called_type);
 194         size_t              n_arguments = get_method_n_params(call_type);
 195         size_t              n_res       = get_method_n_ress(called_type);
 196         irg_inline_property prop        = get_irg_inline_property(called_graph);
 197         size_t              i;
 198         bool                res;
 199
 200         if (prop == irg_inline_forbidden)
 201                 return false;
 202
 203         if (n_arguments != n_params) {
 204                 /* this is a bad feature of C: without a prototype, we can
 205                  * call a function with less parameters than needed. Currently
 206                  * we don't support this, although we could use Unknown than. */
 207                 return false;
 208         }
 209         if (n_res != get_method_n_ress(call_type)) {
 210                 return false;
 211         }
 212
 213         /* Argh, compiling C has some bad consequences:
 214          * It is implementation dependent what happens in that case.
 215          * We support inlining, if the bitsize of the types matches AND
 216          * the same arithmetic is used. */
 217         for (i = 0; i < n_params; ++i) {
 218                 ir_type *param_tp = get_method_param_type(called_type, i);
 219                 ir_type *arg_tp   = get_method_param_type(call_type, i);
 220
 221                 if (param_tp != arg_tp) {
 222                         ir_mode *pmode = get_type_mode(param_tp);
 223                         ir_mode *amode = get_type_mode(arg_tp);
 224
 225                         if (pmode == NULL || amode == NULL)
 226                                 return false;
 227                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
 228                                 return false;
 229                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
 230                                 return false;
 231                         /* otherwise we can simply "reinterpret" the bits */
 232                 }
 233         }
 234         for (i = 0; i < n_res; ++i) {
 235                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
 236                 ir_type *used_res_tp = get_method_res_type(call_type, i);
 237
 238                 if (decl_res_tp != used_res_tp) {
 239                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
 240                         ir_mode *used_mode = get_type_mode(used_res_tp);
 241                         if (decl_mode == NULL || used_mode == NULL)
 242                                 return false;
 243                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
 244                                 return false;
 245                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
 246                                 return false;
 247                         /* otherwise we can "reinterpret" the bits */
 248                 }
 249         }
 250
 251         /* check parameters for compound arguments */
 252         for (i = 0; i < n_params; ++i) {
 253                 ir_type *p_type = get_method_param_type(call_type, i);
 254
 255                 if (is_compound_type(p_type))
 256                         return false;
 257         }
 258
 259         /* check results for compound arguments */
 260         for (i = 0; i < n_res; ++i) {
 261                 ir_type *r_type = get_method_res_type(call_type, i);
 262
 263                 if (is_compound_type(r_type))
 264                         return false;
 265         }
 266
 267         res = true;
 268         irg_walk_graph(called_graph, find_addr, NULL, &res);
 269
 270         return res;
 271 }
 272
 273 enum exc_mode {
 274         exc_handler,    /**< There is a handler. */
 275         exc_no_handler  /**< Exception handling not represented. */
 276 };
 277
 278 /**
 279  * copy all entities on the stack frame on 1 irg to the stackframe of another.
 280  * Sets entity links of the old entities to the copies
 281  */
 282 static void copy_frame_entities(ir_graph *from, ir_graph *to)
 283 {
 284         ir_type *from_frame = get_irg_frame_type(from);
 285         ir_type *to_frame   = get_irg_frame_type(to);
 286         size_t   n_members  = get_class_n_members(from_frame);
 287         size_t   i;
 288         assert(from_frame != to_frame);
 289
 290         for (i = 0; i < n_members; ++i) {
 291                 ir_entity *old_ent = get_class_member(from_frame, i);
 292                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
 293                 set_entity_link(old_ent, new_ent);
 294         }
 295 }
 296
 297 /* Inlines a method at the given call site. */
 298 int inline_method(ir_node *call, ir_graph *called_graph)
 299 {
 300         ir_node       *pre_call;
 301         ir_node       *post_call, *post_bl;
 302         ir_node       *in[pn_Start_max];
 303         ir_node       *end, *end_bl, *block;
 304         ir_node       **res_pred;
 305         ir_node       **cf_pred;
 306         ir_node       **args_in;
 307         ir_node       *ret, *phi;
 308         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
 309         int           irn_arity, n_params;
 310         int           n_mem_phi;
 311         enum exc_mode exc_handling;
 312         ir_type       *mtp;
 313         ir_type       *ctp;
 314         ir_entity     *ent;
 315         ir_graph      *rem;
 316         ir_graph      *irg = get_irn_irg(call);
 317
 318         /* we cannot inline some types of calls */
 319         if (! can_inline(call, called_graph))
 320                 return 0;
 321
 322         /* We cannot inline a recursive call. The graph must be copied before
 323          * the call the inline_method() using create_irg_copy(). */
 324         if (called_graph == irg)
 325                 return 0;
 326
 327         ent      = get_irg_entity(called_graph);
 328         mtp      = get_entity_type(ent);
 329         ctp      = get_Call_type(call);
 330         n_params = get_method_n_params(mtp);
 331         n_res    = get_method_n_ress(mtp);
 332
 333         rem = current_ir_graph;
 334         current_ir_graph = irg;
 335
 336         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
 337
 338         /* optimizations can cause problems when allocating new nodes */
 339         rem_opt = get_opt_optimize();
 340         set_optimize(0);
 341
 342         /* Handle graph state */
 343         assert(get_irg_phase_state(irg) != phase_building);
 344         assert(get_irg_pinned(irg) == op_pin_state_pinned);
 345         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
 346         set_irg_outs_inconsistent(irg);
 347         set_irg_extblk_inconsistent(irg);
 348         set_irg_doms_inconsistent(irg);
 349         set_irg_loopinfo_inconsistent(irg);
 350         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 351         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
 352         edges_deactivate(irg);
 353
 354         /* here we know we WILL inline, so inform the statistics */
 355         hook_inline(call, called_graph);
 356
 357         /* -- Decide how to handle exception control flow: Is there a handler
 358            for the Call node, or do we branch directly to End on an exception?
 359            exc_handling:
 360            0 There is a handler.
 361            2 Exception handling not represented in Firm. -- */
 362         {
 363                 ir_node *Xproj = NULL;
 364                 ir_node *proj;
 365                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
 366                      proj = (ir_node*)get_irn_link(proj)) {
 367                         long proj_nr = get_Proj_proj(proj);
 368                         if (proj_nr == pn_Call_X_except) Xproj = proj;
 369                 }
 370                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
 371         }
 372
 373         /* create the argument tuple */
 374         args_in = ALLOCAN(ir_node*, n_params);
 375
 376         block = get_nodes_block(call);
 377         for (i = n_params - 1; i >= 0; --i) {
 378                 ir_node *arg      = get_Call_param(call, i);
 379                 ir_type *param_tp = get_method_param_type(mtp, i);
 380                 ir_mode *mode     = get_type_mode(param_tp);
 381
 382                 if (mode != get_irn_mode(arg)) {
 383                         arg = new_r_Conv(block, arg, mode);
 384                 }
 385                 args_in[i] = arg;
 386         }
 387
 388         /* the procedure and later replaces the Start node of the called graph.
 389          * Post_call is the old Call node and collects the results of the called
 390          * graph. Both will end up being a tuple. */
 391         post_bl = get_nodes_block(call);
 392         /* XxMxPxPxPxT of Start + parameter of Call */
 393         in[pn_Start_M]              = get_Call_mem(call);
 394         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
 395         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
 396         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
 397         pre_call = new_r_Tuple(post_bl, pn_Start_max, in);
 398         post_call = call;
 399
 400         /* --
 401            The new block gets the ins of the old block, pre_call and all its
 402            predecessors and all Phi nodes. -- */
 403         part_block(pre_call);
 404
 405         /* increment visited flag for later walk */
 406         inc_irg_visited(called_graph);
 407
 408         /* link some nodes to nodes in the current graph so instead of copying
 409          * the linked nodes will get used.
 410          * So the copier will use the created Tuple instead of copying the start
 411          * node, similar for singleton nodes like NoMem and Bad.
 412          * Note: this will prohibit predecessors to be copied - only do it for
 413          *       nodes without predecessors */
 414         {
 415                 ir_node *start_block;
 416                 ir_node *start;
 417                 ir_node *bad;
 418                 ir_node *nomem;
 419
 420                 start_block = get_irg_start_block(called_graph);
 421                 set_new_node(start_block, get_nodes_block(pre_call));
 422                 mark_irn_visited(start_block);
 423
 424                 start = get_irg_start(called_graph);
 425                 set_new_node(start, pre_call);
 426                 mark_irn_visited(start);
 427
 428                 bad = get_irg_bad(called_graph);
 429                 set_new_node(bad, get_irg_bad(irg));
 430                 mark_irn_visited(bad);
 431
 432                 nomem = get_irg_no_mem(called_graph);
 433                 set_new_node(nomem, get_irg_no_mem(irg));
 434                 mark_irn_visited(nomem);
 435         }
 436
 437         /* entitiy link is used to link entities on old stackframe to the
 438          * new stackframe */
 439         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
 440
 441         /* copy entities and nodes */
 442         assert(!irn_visited(get_irg_end(called_graph)));
 443         copy_frame_entities(called_graph, irg);
 444         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
 445                       irg);
 446
 447         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
 448
 449         /* -- Merge the end of the inlined procedure with the call site -- */
 450         /* We will turn the old Call node into a Tuple with the following
 451            predecessors:
 452            -1:  Block of Tuple.
 453            0: Phi of all Memories of Return statements.
 454            1: Jmp from new Block that merges the control flow from all exception
 455            predecessors of the old end block.
 456            2: Tuple of all arguments.
 457            3: Phi of Exception memories.
 458            In case the old Call directly branches to End on an exception we don't
 459            need the block merging all exceptions nor the Phi of the exception
 460            memories.
 461         */
 462
 463         /* Precompute some values */
 464         end_bl = get_new_node(get_irg_end_block(called_graph));
 465         end    = get_new_node(get_irg_end(called_graph));
 466         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
 467         n_res  = get_method_n_ress(get_Call_type(call));
 468
 469         res_pred = XMALLOCN(ir_node*, n_res);
 470         cf_pred  = XMALLOCN(ir_node*, arity);
 471
 472         /* archive keepalives */
 473         irn_arity = get_irn_arity(end);
 474         for (i = 0; i < irn_arity; i++) {
 475                 ir_node *ka = get_End_keepalive(end, i);
 476                 if (! is_Bad(ka))
 477                         add_End_keepalive(get_irg_end(irg), ka);
 478         }
 479
 480         /* replace Return nodes by Jump nodes */
 481         n_ret = 0;
 482         for (i = 0; i < arity; i++) {
 483                 ir_node *ret;
 484                 ret = get_Block_cfgpred(end_bl, i);
 485                 if (is_Return(ret)) {
 486                         ir_node *block = get_nodes_block(ret);
 487                         cf_pred[n_ret] = new_r_Jmp(block);
 488                         n_ret++;
 489                 }
 490         }
 491         set_irn_in(post_bl, n_ret, cf_pred);
 492
 493         /* build a Tuple for all results of the method.
 494          * add Phi node if there was more than one Return. */
 495         turn_into_tuple(post_call, pn_Call_max);
 496         /* First the Memory-Phi */
 497         n_mem_phi = 0;
 498         for (i = 0; i < arity; i++) {
 499                 ret = get_Block_cfgpred(end_bl, i);
 500                 if (is_Return(ret)) {
 501                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
 502                 }
 503                 /* memory output for some exceptions is directly connected to End */
 504                 if (is_Call(ret)) {
 505                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
 506                 } else if (is_fragile_op(ret)) {
 507                         /* We rely that all cfops have the memory output at the same position. */
 508                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
 509                 } else if (is_Raise(ret)) {
 510                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
 511                 }
 512         }
 513         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
 514         set_Tuple_pred(call, pn_Call_M, phi);
 515         /* Conserve Phi-list for further inlinings -- but might be optimized */
 516         if (get_nodes_block(phi) == post_bl) {
 517                 set_irn_link(phi, get_irn_link(post_bl));
 518                 set_irn_link(post_bl, phi);
 519         }
 520         /* Now the real results */
 521         if (n_res > 0) {
 522                 ir_node *result_tuple;
 523                 for (j = 0; j < n_res; j++) {
 524                         ir_type *res_type = get_method_res_type(ctp, j);
 525                         ir_mode *res_mode = get_type_mode(res_type);
 526                         n_ret = 0;
 527                         for (i = 0; i < arity; i++) {
 528                                 ret = get_Block_cfgpred(end_bl, i);
 529                                 if (is_Return(ret)) {
 530                                         ir_node *res = get_Return_res(ret, j);
 531                                         if (get_irn_mode(res) != res_mode) {
 532                                                 ir_node *block = get_nodes_block(res);
 533                                                 res = new_r_Conv(block, res, res_mode);
 534                                         }
 535                                         cf_pred[n_ret] = res;
 536                                         n_ret++;
 537                                 }
 538                         }
 539                         if (n_ret > 0) {
 540                                 ir_mode *mode = get_irn_mode(cf_pred[0]);
 541                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, mode);
 542                         } else {
 543                                 phi = new_r_Bad(irg);
 544                         }
 545                         res_pred[j] = phi;
 546                         /* Conserve Phi-list for further inlinings -- but might be optimized */
 547                         if (get_nodes_block(phi) == post_bl) {
 548                                 set_Phi_next(phi, get_Block_phis(post_bl));
 549                                 set_Block_phis(post_bl, phi);
 550                         }
 551                 }
 552                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
 553                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
 554         } else {
 555                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg));
 556         }
 557         /* handle the regular call */
 558         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
 559
 560         /* Finally the exception control flow.
 561            We have two possible situations:
 562            First if the Call branches to an exception handler:
 563            We need to add a Phi node to
 564            collect the memory containing the exception objects.  Further we need
 565            to add another block to get a correct representation of this Phi.  To
 566            this block we add a Jmp that resolves into the X output of the Call
 567            when the Call is turned into a tuple.
 568            Second: There is no exception edge. Just add all inlined exception
 569            branches to the End node.
 570          */
 571         if (exc_handling == exc_handler) {
 572                 n_exc = 0;
 573                 for (i = 0; i < arity; i++) {
 574                         ir_node *ret, *irn;
 575                         ret = get_Block_cfgpred(end_bl, i);
 576                         irn = skip_Proj(ret);
 577                         if (is_fragile_op(irn) || is_Raise(irn)) {
 578                                 cf_pred[n_exc] = ret;
 579                                 ++n_exc;
 580                         }
 581                 }
 582                 if (n_exc > 0) {
 583                         if (n_exc == 1) {
 584                                 /* simple fix */
 585                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
 586                         } else {
 587                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
 588                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
 589                         }
 590                 } else {
 591                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
 592                 }
 593         } else {
 594                 ir_node *main_end_bl;
 595                 int main_end_bl_arity;
 596                 ir_node **end_preds;
 597
 598                 /* assert(exc_handling == 1 || no exceptions. ) */
 599                 n_exc = 0;
 600                 for (i = 0; i < arity; i++) {
 601                         ir_node *ret = get_Block_cfgpred(end_bl, i);
 602                         ir_node *irn = skip_Proj(ret);
 603
 604                         if (is_fragile_op(irn) || is_Raise(irn)) {
 605                                 cf_pred[n_exc] = ret;
 606                                 n_exc++;
 607                         }
 608                 }
 609                 main_end_bl       = get_irg_end_block(irg);
 610                 main_end_bl_arity = get_irn_arity(main_end_bl);
 611                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
 612
 613                 for (i = 0; i < main_end_bl_arity; ++i)
 614                         end_preds[i] = get_irn_n(main_end_bl, i);
 615                 for (i = 0; i < n_exc; ++i)
 616                         end_preds[main_end_bl_arity + i] = cf_pred[i];
 617                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
 618                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
 619                 free(end_preds);
 620         }
 621         free(res_pred);
 622         free(cf_pred);
 623
 624         /* --  Turn CSE back on. -- */
 625         set_optimize(rem_opt);
 626         current_ir_graph = rem;
 627
 628         return 1;
 629 }
 630
 631 /********************************************************************/
 632 /* Apply inlining to small methods.                                 */
 633 /********************************************************************/
 634
 635 static struct obstack  temp_obst;
 636
 637 /** Represents a possible inlinable call in a graph. */
 638 typedef struct call_entry {
 639         ir_node    *call;       /**< The Call node. */
 640         ir_graph   *callee;     /**< The callee IR-graph. */
 641         list_head  list;        /**< List head for linking the next one. */
 642         int        loop_depth;  /**< The loop depth of this call. */
 643         int        benefice;    /**< The calculated benefice of this call. */
 644         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
 645         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
 646 } call_entry;
 647
 648 /**
 649  * environment for inlining small irgs
 650  */
 651 typedef struct inline_env_t {
 652         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
 653         list_head      calls; /**< The call entry list. */
 654 } inline_env_t;
 655
 656 /**
 657  * Returns the irg called from a Call node. If the irg is not
 658  * known, NULL is returned.
 659  *
 660  * @param call  the call node
 661  */
 662 static ir_graph *get_call_called_irg(ir_node *call)
 663 {
 664         ir_node *addr;
 665
 666         addr = get_Call_ptr(call);
 667         if (is_Global(addr)) {
 668                 ir_entity *ent = get_Global_entity(addr);
 669                 /* we don't know which function gets finally bound to a weak symbol */
 670                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
 671                         return NULL;
 672
 673                 return get_entity_irg(ent);
 674         }
 675
 676         return NULL;
 677 }
 678
 679 /**
 680  * Walker: Collect all calls to known graphs inside a graph.
 681  */
 682 static void collect_calls(ir_node *call, void *env)
 683 {
 684         (void) env;
 685         if (is_Call(call)) {
 686                 ir_graph *called_irg = get_call_called_irg(call);
 687
 688                 if (called_irg != NULL) {
 689                         /* The Call node calls a locally defined method.  Remember to inline. */
 690                         inline_env_t *ienv  = (inline_env_t*)env;
 691                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
 692                         entry->call       = call;
 693                         entry->callee     = called_irg;
 694                         entry->loop_depth = 0;
 695                         entry->benefice   = 0;
 696                         entry->local_adr  = 0;
 697                         entry->all_const  = 0;
 698
 699                         list_add_tail(&entry->list, &ienv->calls);
 700                 }
 701         }
 702 }
 703
 704 /**
 705  * Inlines all small methods at call sites where the called address comes
 706  * from a Const node that references the entity representing the called
 707  * method.
 708  * The size argument is a rough measure for the code size of the method:
 709  * Methods where the obstack containing the firm graph is smaller than
 710  * size are inlined.
 711  */
 712 void inline_small_irgs(ir_graph *irg, int size)
 713 {
 714         ir_graph *rem = current_ir_graph;
 715         inline_env_t env;
 716         call_entry *entry;
 717
 718         current_ir_graph = irg;
 719         /* Handle graph state */
 720         assert(get_irg_phase_state(irg) != phase_building);
 721         free_callee_info(irg);
 722
 723         /* Find Call nodes to inline.
 724            (We can not inline during a walk of the graph, as inlining the same
 725            method several times changes the visited flag of the walked graph:
 726            after the first inlining visited of the callee equals visited of
 727            the caller.  With the next inlining both are increased.) */
 728         obstack_init(&env.obst);
 729         INIT_LIST_HEAD(&env.calls);
 730         irg_walk_graph(irg, NULL, collect_calls, &env);
 731
 732         if (! list_empty(&env.calls)) {
 733                 /* There are calls to inline */
 734                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
 735                 collect_phiprojs(irg);
 736
 737                 list_for_each_entry(call_entry, entry, &env.calls, list) {
 738                         ir_graph            *callee = entry->callee;
 739                         irg_inline_property prop    = get_irg_inline_property(callee);
 740
 741                         if (prop == irg_inline_forbidden) {
 742                                 continue;
 743                         }
 744
 745                         if (prop >= irg_inline_forced ||
 746                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
 747                                 inline_method(entry->call, callee);
 748                         }
 749                 }
 750                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
 751         }
 752         obstack_free(&env.obst, NULL);
 753         current_ir_graph = rem;
 754 }
 755
 756 typedef struct inline_small_irgs_pass_t {
 757         ir_graph_pass_t pass;
 758         int            size;
 759 } inline_small_irgs_pass_t;
 760
 761 /**
 762  * Wrapper to run inline_small_irgs() as a pass.
 763  */
 764 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
 765 {
 766         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
 767
 768         inline_small_irgs(irg, pass->size);
 769         return 0;
 770 }
 771
 772 /* create a pass for inline_small_irgs() */
 773 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
 774 {
 775         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
 776
 777         pass->size = size;
 778         return def_graph_pass_constructor(
 779                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
 780 }
 781
 782 /**
 783  * Environment for inlining irgs.
 784  */
 785 typedef struct {
 786         list_head calls;             /**< List of of all call nodes in this graph. */
 787         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
 788         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
 789         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
 790         unsigned  n_nodes_orig;      /**< for statistics */
 791         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
 792         unsigned  n_call_nodes_orig; /**< for statistics */
 793         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
 794         unsigned  n_callers_orig;    /**< for statistics */
 795         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
 796         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
 797 } inline_irg_env;
 798
 799 /**
 800  * Allocate a new environment for inlining.
 801  */
 802 static inline_irg_env *alloc_inline_irg_env(void)
 803 {
 804         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
 805         INIT_LIST_HEAD(&env->calls);
 806         env->local_weights     = NULL;
 807         env->n_nodes           = -2; /* do not count count Start, End */
 808         env->n_blocks          = -2; /* do not count count Start, End Block */
 809         env->n_nodes_orig      = -2; /* do not count Start, End */
 810         env->n_call_nodes      = 0;
 811         env->n_call_nodes_orig = 0;
 812         env->n_callers         = 0;
 813         env->n_callers_orig    = 0;
 814         env->got_inline        = 0;
 815         env->recursive         = 0;
 816         return env;
 817 }
 818
 819 typedef struct walker_env {
 820         inline_irg_env *x;     /**< the inline environment */
 821         char ignore_runtime;   /**< the ignore runtime flag */
 822         char ignore_callers;   /**< if set, do change callers data */
 823 } wenv_t;
 824
 825 /**
 826  * post-walker: collect all calls in the inline-environment
 827  * of a graph and sum some statistics.
 828  */
 829 static void collect_calls2(ir_node *call, void *ctx)
 830 {
 831         wenv_t         *env = (wenv_t*)ctx;
 832         inline_irg_env *x = env->x;
 833         unsigned        code = get_irn_opcode(call);
 834         ir_graph       *callee;
 835         call_entry     *entry;
 836
 837         /* count meaningful nodes in irg */
 838         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
 839                 if (code != iro_Block) {
 840                         ++x->n_nodes;
 841                         ++x->n_nodes_orig;
 842                 } else {
 843                         ++x->n_blocks;
 844                 }
 845         }
 846
 847         if (code != iro_Call) return;
 848
 849         /* check, if it's a runtime call */
 850         if (env->ignore_runtime) {
 851                 ir_node *symc = get_Call_ptr(call);
 852
 853                 if (is_Global(symc)) {
 854                         ir_entity *ent = get_Global_entity(symc);
 855
 856                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
 857                                 return;
 858                 }
 859         }
 860
 861         /* collect all call nodes */
 862         ++x->n_call_nodes;
 863         ++x->n_call_nodes_orig;
 864
 865         callee = get_call_called_irg(call);
 866         if (callee != NULL) {
 867                 if (! env->ignore_callers) {
 868                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
 869                         /* count all static callers */
 870                         ++callee_env->n_callers;
 871                         ++callee_env->n_callers_orig;
 872                 }
 873                 if (callee == current_ir_graph)
 874                         x->recursive = 1;
 875
 876                 /* link it in the list of possible inlinable entries */
 877                 entry = OALLOC(&temp_obst, call_entry);
 878                 entry->call       = call;
 879                 entry->callee     = callee;
 880                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
 881                 entry->benefice   = 0;
 882                 entry->local_adr  = 0;
 883                 entry->all_const  = 0;
 884
 885                 list_add_tail(&entry->list, &x->calls);
 886         }
 887 }
 888
 889 /**
 890  * Returns TRUE if the number of callers is 0 in the irg's environment,
 891  * hence this irg is a leave.
 892  */
 893 inline static int is_leave(ir_graph *irg)
 894 {
 895         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
 896         return env->n_call_nodes == 0;
 897 }
 898
 899 /**
 900  * Returns TRUE if the number of nodes in the callee is
 901  * smaller then size in the irg's environment.
 902  */
 903 inline static int is_smaller(ir_graph *callee, unsigned size)
 904 {
 905         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
 906         return env->n_nodes < size;
 907 }
 908
 909 /**
 910  * Duplicate a call entry.
 911  *
 912  * @param entry     the original entry to duplicate
 913  * @param new_call  the new call node
 914  * @param loop_depth_delta
 915  *                  delta value for the loop depth
 916  */
 917 static call_entry *duplicate_call_entry(const call_entry *entry,
 918                                         ir_node *new_call, int loop_depth_delta)
 919 {
 920         call_entry *nentry = OALLOC(&temp_obst, call_entry);
 921         nentry->call       = new_call;
 922         nentry->callee     = entry->callee;
 923         nentry->benefice   = entry->benefice;
 924         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
 925         nentry->local_adr  = entry->local_adr;
 926         nentry->all_const  = entry->all_const;
 927
 928         return nentry;
 929 }
 930
 931 /**
 932  * Append all call nodes of the source environment to the nodes of in the destination
 933  * environment.
 934  *
 935  * @param dst         destination environment
 936  * @param src         source environment
 937  * @param loop_depth  the loop depth of the call that is replaced by the src list
 938  */
 939 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
 940 {
 941         call_entry *entry, *nentry;
 942
 943         /* Note that the src list points to Call nodes in the inlined graph, but
 944            we need Call nodes in our graph. Luckily the inliner leaves this information
 945            in the link field. */
 946         list_for_each_entry(call_entry, entry, &src->calls, list) {
 947                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
 948                 list_add_tail(&nentry->list, &dst->calls);
 949         }
 950         dst->n_call_nodes += src->n_call_nodes;
 951         dst->n_nodes      += src->n_nodes;
 952 }
 953
 954 /*
 955  * Inlines small leave methods at call sites where the called address comes
 956  * from a Const node that references the entity representing the called
 957  * method.
 958  * The size argument is a rough measure for the code size of the method:
 959  * Methods where the obstack containing the firm graph is smaller than
 960  * size are inlined.
 961  */
 962 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
 963                             unsigned size, int ignore_runtime)
 964 {
 965         inline_irg_env   *env;
 966         ir_graph         *irg;
 967         size_t           i, n_irgs;
 968         ir_graph         *rem;
 969         int              did_inline;
 970         wenv_t           wenv;
 971         call_entry       *entry, *next;
 972         const call_entry *centry;
 973         pmap             *copied_graphs;
 974         pmap_entry       *pm_entry;
 975
 976         rem = current_ir_graph;
 977         obstack_init(&temp_obst);
 978
 979         /* a map for the copied graphs, used to inline recursive calls */
 980         copied_graphs = pmap_create();
 981
 982         /* extend all irgs by a temporary data structure for inlining. */
 983         n_irgs = get_irp_n_irgs();
 984         for (i = 0; i < n_irgs; ++i)
 985                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
 986
 987         /* Pre-compute information in temporary data structure. */
 988         wenv.ignore_runtime = ignore_runtime;
 989         wenv.ignore_callers = 0;
 990         for (i = 0; i < n_irgs; ++i) {
 991                 ir_graph *irg = get_irp_irg(i);
 992
 993                 assert(get_irg_phase_state(irg) != phase_building);
 994                 free_callee_info(irg);
 995
 996                 assure_cf_loop(irg);
 997                 wenv.x = (inline_irg_env*)get_irg_link(irg);
 998                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
 999         }
1000
1001         /* -- and now inline. -- */
1002
1003         /* Inline leaves recursively -- we might construct new leaves. */
1004         do {
1005                 did_inline = 0;
1006
1007                 for (i = 0; i < n_irgs; ++i) {
1008                         ir_node *call;
1009                         int phiproj_computed = 0;
1010
1011                         current_ir_graph = get_irp_irg(i);
1012                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1013
1014                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1015                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1016                                 ir_graph            *callee;
1017                                 irg_inline_property  prop;
1018
1019                                 if (env->n_nodes > maxsize)
1020                                         break;
1021
1022                                 call   = entry->call;
1023                                 callee = entry->callee;
1024
1025                                 prop = get_irg_inline_property(callee);
1026                                 if (prop == irg_inline_forbidden) {
1027                                         continue;
1028                                 }
1029
1030                                 if (is_leave(callee) && (
1031                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1032                                         if (!phiproj_computed) {
1033                                                 phiproj_computed = 1;
1034                                                 collect_phiprojs(current_ir_graph);
1035                                         }
1036                                         did_inline = inline_method(call, callee);
1037
1038                                         if (did_inline) {
1039                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1040
1041                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1042                                                 phiproj_computed = 0;
1043
1044                                                 /* Do some statistics */
1045                                                 env->got_inline = 1;
1046                                                 --env->n_call_nodes;
1047                                                 env->n_nodes += callee_env->n_nodes;
1048                                                 --callee_env->n_callers;
1049
1050                                                 /* remove this call from the list */
1051                                                 list_del(&entry->list);
1052                                                 continue;
1053                                         }
1054                                 }
1055                         }
1056                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1057                 }
1058         } while (did_inline);
1059
1060         /* inline other small functions. */
1061         for (i = 0; i < n_irgs; ++i) {
1062                 ir_node *call;
1063                 int phiproj_computed = 0;
1064
1065                 current_ir_graph = get_irp_irg(i);
1066                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1067
1068                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1069
1070                 /* note that the list of possible calls is updated during the process */
1071                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1072                         irg_inline_property prop;
1073                         ir_graph            *callee;
1074                         pmap_entry          *e;
1075
1076                         call   = entry->call;
1077                         callee = entry->callee;
1078
1079                         prop = get_irg_inline_property(callee);
1080                         if (prop == irg_inline_forbidden) {
1081                                 continue;
1082                         }
1083
1084                         e = pmap_find(copied_graphs, callee);
1085                         if (e != NULL) {
1086                                 /*
1087                                  * Remap callee if we have a copy.
1088                                  * FIXME: Should we do this only for recursive Calls ?
1089                                  */
1090                                 callee = (ir_graph*)e->value;
1091                         }
1092
1093                         if (prop >= irg_inline_forced ||
1094                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1095                                 if (current_ir_graph == callee) {
1096                                         /*
1097                                          * Recursive call: we cannot directly inline because we cannot walk
1098                                          * the graph and change it. So we have to make a copy of the graph
1099                                          * first.
1100                                          */
1101
1102                                         inline_irg_env *callee_env;
1103                                         ir_graph       *copy;
1104
1105                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1106
1107                                         /*
1108                                          * No copy yet, create one.
1109                                          * Note that recursive methods are never leaves, so it is sufficient
1110                                          * to test this condition here.
1111                                          */
1112                                         copy = create_irg_copy(callee);
1113
1114                                         /* create_irg_copy() destroys the Proj links, recompute them */
1115                                         phiproj_computed = 0;
1116
1117                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1118
1119                                         /* allocate new environment */
1120                                         callee_env = alloc_inline_irg_env();
1121                                         set_irg_link(copy, callee_env);
1122
1123                                         assure_cf_loop(copy);
1124                                         wenv.x              = callee_env;
1125                                         wenv.ignore_callers = 1;
1126                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1127
1128                                         /*
1129                                          * Enter the entity of the original graph. This is needed
1130                                          * for inline_method(). However, note that ent->irg still points
1131                                          * to callee, NOT to copy.
1132                                          */
1133                                         set_irg_entity(copy, get_irg_entity(callee));
1134
1135                                         pmap_insert(copied_graphs, callee, copy);
1136                                         callee = copy;
1137
1138                                         /* we have only one caller: the original graph */
1139                                         callee_env->n_callers      = 1;
1140                                         callee_env->n_callers_orig = 1;
1141                                 }
1142                                 if (! phiproj_computed) {
1143                                         phiproj_computed = 1;
1144                                         collect_phiprojs(current_ir_graph);
1145                                 }
1146                                 did_inline = inline_method(call, callee);
1147                                 if (did_inline) {
1148                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1149
1150                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1151                                         phiproj_computed = 0;
1152
1153                                         /* callee was inline. Append its call list. */
1154                                         env->got_inline = 1;
1155                                         --env->n_call_nodes;
1156                                         append_call_list(env, callee_env, entry->loop_depth);
1157                                         --callee_env->n_callers;
1158
1159                                         /* after we have inlined callee, all called methods inside callee
1160                                            are now called once more */
1161                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1162                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1163                                                 ++penv->n_callers;
1164                                         }
1165
1166                                         /* remove this call from the list */
1167                                         list_del(&entry->list);
1168                                         continue;
1169                                 }
1170                         }
1171                 }
1172                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1173         }
1174
1175         for (i = 0; i < n_irgs; ++i) {
1176                 irg = get_irp_irg(i);
1177                 env = (inline_irg_env*)get_irg_link(irg);
1178
1179                 if (env->got_inline) {
1180                         optimize_graph_df(irg);
1181                         optimize_cf(irg);
1182                 }
1183                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1184                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1185                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1186                         env->n_callers_orig, env->n_callers,
1187                         get_entity_name(get_irg_entity(irg))));
1188                 }
1189         }
1190
1191         /* kill the copied graphs: we don't need them anymore */
1192         foreach_pmap(copied_graphs, pm_entry) {
1193                 ir_graph *copy = (ir_graph*)pm_entry->value;
1194
1195                 /* reset the entity, otherwise it will be deleted in the next step ... */
1196                 set_irg_entity(copy, NULL);
1197                 free_ir_graph(copy);
1198         }
1199         pmap_destroy(copied_graphs);
1200
1201         obstack_free(&temp_obst, NULL);
1202         current_ir_graph = rem;
1203 }
1204
1205 typedef struct inline_leave_functions_pass_t {
1206         ir_prog_pass_t pass;
1207         unsigned       maxsize;
1208         unsigned       leavesize;
1209         unsigned       size;
1210         int            ignore_runtime;
1211 } inline_leave_functions_pass_t;
1212
1213 /**
1214  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1215  */
1216 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1217 {
1218         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1219
1220         (void)irp;
1221         inline_leave_functions(
1222                 pass->maxsize, pass->leavesize,
1223                 pass->size, pass->ignore_runtime);
1224         return 0;
1225 }
1226
1227 /* create a pass for inline_leave_functions() */
1228 ir_prog_pass_t *inline_leave_functions_pass(
1229         const char *name, unsigned maxsize, unsigned leavesize,
1230         unsigned size, int ignore_runtime)
1231 {
1232         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1233
1234         pass->maxsize        = maxsize;
1235         pass->leavesize      = leavesize;
1236         pass->size           = size;
1237         pass->ignore_runtime = ignore_runtime;
1238
1239         return def_prog_pass_constructor(
1240                 &pass->pass,
1241                 name ? name : "inline_leave_functions",
1242                 inline_leave_functions_wrapper);
1243 }
1244
1245 /**
1246  * Calculate the parameter weights for transmitting the address of a local variable.
1247  */
1248 static unsigned calc_method_local_weight(ir_node *arg)
1249 {
1250         int      i, j, k;
1251         unsigned v, weight = 0;
1252
1253         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1254                 ir_node *succ = get_irn_out(arg, i);
1255
1256                 switch (get_irn_opcode(succ)) {
1257                 case iro_Load:
1258                 case iro_Store:
1259                         /* Loads and Store can be removed */
1260                         weight += 3;
1261                         break;
1262                 case iro_Sel:
1263                         /* check if all args are constant */
1264                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1265                                 ir_node *idx = get_Sel_index(succ, j);
1266                                 if (! is_Const(idx))
1267                                         return 0;
1268                         }
1269                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1270                            some unsupported node. */
1271                         v = calc_method_local_weight(succ);
1272                         if (v == 0)
1273                                 return 0;
1274                         /* we can kill one Sel with constant indexes, this is cheap */
1275                         weight += v + 1;
1276                         break;
1277                 case iro_Id:
1278                         /* when looking backward we might find Id nodes */
1279                         weight += calc_method_local_weight(succ);
1280                         break;
1281                 case iro_Tuple:
1282                         /* unoptimized tuple */
1283                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1284                                 ir_node *pred = get_Tuple_pred(succ, j);
1285                                 if (pred == arg) {
1286                                         /* look for Proj(j) */
1287                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1288                                                 ir_node *succ_succ = get_irn_out(succ, k);
1289                                                 if (is_Proj(succ_succ)) {
1290                                                         if (get_Proj_proj(succ_succ) == j) {
1291                                                                 /* found */
1292                                                                 weight += calc_method_local_weight(succ_succ);
1293                                                         }
1294                                                 } else {
1295                                                         /* this should NOT happen */
1296                                                         return 0;
1297                                                 }
1298                                         }
1299                                 }
1300                         }
1301                         break;
1302                 default:
1303                         /* any other node: unsupported yet or bad. */
1304                         return 0;
1305                 }
1306         }
1307         return weight;
1308 }
1309
1310 /**
1311  * Calculate the parameter weights for transmitting the address of a local variable.
1312  */
1313 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1314 {
1315         ir_entity *ent = get_irg_entity(irg);
1316         ir_type  *mtp;
1317         size_t   nparams;
1318         int      i;
1319         long     proj_nr;
1320         ir_node  *irg_args, *arg;
1321
1322         mtp      = get_entity_type(ent);
1323         nparams  = get_method_n_params(mtp);
1324
1325         /* allocate a new array. currently used as 'analysed' flag */
1326         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1327
1328         /* If the method haven't parameters we have nothing to do. */
1329         if (nparams <= 0)
1330                 return;
1331
1332         assure_irg_outs(irg);
1333         irg_args = get_irg_args(irg);
1334         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1335                 arg     = get_irn_out(irg_args, i);
1336                 proj_nr = get_Proj_proj(arg);
1337                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1338         }
1339 }
1340
1341 /**
1342  * Calculate the benefice for transmitting an local variable address.
1343  * After inlining, the local variable might be transformed into a
1344  * SSA variable by scalar_replacement().
1345  */
1346 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1347 {
1348         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1349
1350         if (env->local_weights == NULL)
1351                 analyze_irg_local_weights(env, callee);
1352
1353         if (pos < ARR_LEN(env->local_weights))
1354                 return env->local_weights[pos];
1355         return 0;
1356 }
1357
1358 /**
1359  * Calculate a benefice value for inlining the given call.
1360  *
1361  * @param call       the call node we have to inspect
1362  * @param callee     the called graph
1363  */
1364 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1365 {
1366         ir_node   *call = entry->call;
1367         ir_entity *ent  = get_irg_entity(callee);
1368         ir_node   *frame_ptr;
1369         ir_type   *mtp;
1370         int       weight = 0;
1371         int       i, n_params, all_const;
1372         unsigned  cc, v;
1373         irg_inline_property prop;
1374
1375         inline_irg_env *callee_env;
1376
1377         prop = get_irg_inline_property(callee);
1378         if (prop == irg_inline_forbidden) {
1379                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1380                     call, callee));
1381                 return entry->benefice = INT_MIN;
1382         }
1383
1384         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1385                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1386                     call, callee));
1387                 return entry->benefice = INT_MIN;
1388         }
1389
1390         /* costs for every passed parameter */
1391         n_params = get_Call_n_params(call);
1392         mtp      = get_entity_type(ent);
1393         cc       = get_method_calling_convention(mtp);
1394         if (cc & cc_reg_param) {
1395                 /* register parameter, smaller costs for register parameters */
1396                 int max_regs = cc & ~cc_bits;
1397
1398                 if (max_regs < n_params)
1399                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1400                 else
1401                         weight += n_params * 2;
1402         } else {
1403                 /* parameters are passed an stack */
1404                 weight += 5 * n_params;
1405         }
1406
1407         /* constant parameters improve the benefice */
1408         frame_ptr = get_irg_frame(current_ir_graph);
1409         all_const = 1;
1410         for (i = 0; i < n_params; ++i) {
1411                 ir_node *param = get_Call_param(call, i);
1412
1413                 if (is_Const(param)) {
1414                         weight += get_method_param_weight(ent, i);
1415                 } else {
1416                         all_const = 0;
1417                         if (is_SymConst(param))
1418                                 weight += get_method_param_weight(ent, i);
1419                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1420                                 /*
1421                                  * An address of a local variable is transmitted. After
1422                                  * inlining, scalar_replacement might be able to remove the
1423                                  * local variable, so honor this.
1424                                  */
1425                                 v = get_method_local_adress_weight(callee, i);
1426                                 weight += v;
1427                                 if (v > 0)
1428                                         entry->local_adr = 1;
1429                         }
1430                 }
1431         }
1432         entry->all_const = all_const;
1433
1434         callee_env = (inline_irg_env*)get_irg_link(callee);
1435         if (callee_env->n_callers == 1 &&
1436             callee != current_ir_graph &&
1437             !entity_is_externally_visible(ent)) {
1438                 weight += 700;
1439         }
1440
1441         /* give a bonus for functions with one block */
1442         if (callee_env->n_blocks == 1)
1443                 weight = weight * 3 / 2;
1444
1445         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1446         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1447                 weight += 2000;
1448
1449         /* and finally for leaves: they do not increase the register pressure
1450            because of callee safe registers */
1451         if (callee_env->n_call_nodes == 0)
1452                 weight += 400;
1453
1454         /** it's important to inline inner loops first */
1455         if (entry->loop_depth > 30)
1456                 weight += 30 * 1024;
1457         else
1458                 weight += entry->loop_depth * 1024;
1459
1460         /*
1461          * All arguments constant is probably a good sign, give an extra bonus
1462          */
1463         if (all_const)
1464                 weight += 1024;
1465
1466         return entry->benefice = weight;
1467 }
1468
1469 typedef struct walk_env_t {
1470         ir_graph **irgs;
1471         size_t   last_irg;
1472 } walk_env_t;
1473
1474 /**
1475  * Callgraph walker, collect all visited graphs.
1476  */
1477 static void callgraph_walker(ir_graph *irg, void *data)
1478 {
1479         walk_env_t *env = (walk_env_t *)data;
1480         env->irgs[env->last_irg++] = irg;
1481 }
1482
1483 /**
1484  * Creates an inline order for all graphs.
1485  *
1486  * @return the list of graphs.
1487  */
1488 static ir_graph **create_irg_list(void)
1489 {
1490         ir_entity  **free_methods;
1491         size_t     n_irgs = get_irp_n_irgs();
1492         walk_env_t env;
1493
1494         cgana(&free_methods);
1495         xfree(free_methods);
1496
1497         compute_callgraph();
1498
1499         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1500         env.last_irg = 0;
1501
1502         callgraph_walk(NULL, callgraph_walker, &env);
1503         assert(n_irgs == env.last_irg);
1504
1505         return env.irgs;
1506 }
1507
1508 /**
1509  * Push a call onto the priority list if its benefice is big enough.
1510  *
1511  * @param pqueue   the priority queue of calls
1512  * @param call     the call entry
1513  * @param inlien_threshold
1514  *                 the threshold value
1515  */
1516 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1517                             int inline_threshold)
1518 {
1519         ir_graph            *callee  = call->callee;
1520         irg_inline_property prop     = get_irg_inline_property(callee);
1521         int                 benefice = calc_inline_benefice(call, callee);
1522
1523         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1524             get_irn_irg(call->call), call->call, callee, benefice));
1525
1526         if (prop < irg_inline_forced && benefice < inline_threshold) {
1527                 return;
1528         }
1529
1530         pqueue_put(pqueue, call, benefice);
1531 }
1532
1533 /**
1534  * Try to inline calls into a graph.
1535  *
1536  * @param irg      the graph into which we inline
1537  * @param maxsize  do NOT inline if the size of irg gets
1538  *                 bigger than this amount
1539  * @param inline_threshold
1540  *                 threshold value for inline decision
1541  * @param copied_graphs
1542  *                 map containing copied of recursive graphs
1543  */
1544 static void inline_into(ir_graph *irg, unsigned maxsize,
1545                         int inline_threshold, pmap *copied_graphs)
1546 {
1547         int            phiproj_computed = 0;
1548         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1549         call_entry     *curr_call;
1550         wenv_t         wenv;
1551         pqueue_t       *pqueue;
1552
1553         if (env->n_call_nodes == 0)
1554                 return;
1555
1556         if (env->n_nodes > maxsize) {
1557                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1558                 return;
1559         }
1560
1561         current_ir_graph = irg;
1562         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1563
1564         /* put irgs into the pqueue */
1565         pqueue = new_pqueue();
1566
1567         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1568                 assert(is_Call(curr_call->call));
1569                 maybe_push_call(pqueue, curr_call, inline_threshold);
1570         }
1571
1572         /* note that the list of possible calls is updated during the process */
1573         while (!pqueue_empty(pqueue)) {
1574                 int                 did_inline;
1575                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1576                 ir_graph            *callee     = curr_call->callee;
1577                 ir_node             *call_node  = curr_call->call;
1578                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1579                 irg_inline_property prop        = get_irg_inline_property(callee);
1580                 int                 loop_depth;
1581                 const call_entry    *centry;
1582                 pmap_entry          *e;
1583
1584                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1585                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1586                                                 env->n_nodes, callee, callee_env->n_nodes));
1587                         continue;
1588                 }
1589
1590                 e = pmap_find(copied_graphs, callee);
1591                 if (e != NULL) {
1592                         int benefice = curr_call->benefice;
1593                         /*
1594                          * Reduce the weight for recursive function IFF not all arguments are const.
1595                          * inlining recursive functions is rarely good.
1596                          */
1597                         if (!curr_call->all_const)
1598                                 benefice -= 2000;
1599                         if (benefice < inline_threshold)
1600                                 continue;
1601
1602                         /*
1603                          * Remap callee if we have a copy.
1604                          */
1605                         callee     = (ir_graph*)e->value;
1606                         callee_env = (inline_irg_env*)get_irg_link(callee);
1607                 }
1608
1609                 if (current_ir_graph == callee) {
1610                         /*
1611                          * Recursive call: we cannot directly inline because we cannot
1612                          * walk the graph and change it. So we have to make a copy of
1613                          * the graph first.
1614                          */
1615                         int benefice = curr_call->benefice;
1616                         ir_graph *copy;
1617
1618                         /*
1619                          * Reduce the weight for recursive function IFF not all arguments are const.
1620                          * inlining recursive functions is rarely good.
1621                          */
1622                         if (!curr_call->all_const)
1623                                 benefice -= 2000;
1624                         if (benefice < inline_threshold)
1625                                 continue;
1626
1627                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1628
1629                         /*
1630                          * No copy yet, create one.
1631                          * Note that recursive methods are never leaves, so it is
1632                          * sufficient to test this condition here.
1633                          */
1634                         copy = create_irg_copy(callee);
1635
1636                         /* create_irg_copy() destroys the Proj links, recompute them */
1637                         phiproj_computed = 0;
1638
1639                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1640
1641                         /* allocate a new environment */
1642                         callee_env = alloc_inline_irg_env();
1643                         set_irg_link(copy, callee_env);
1644
1645                         assure_cf_loop(copy);
1646                         wenv.x              = callee_env;
1647                         wenv.ignore_callers = 1;
1648                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1649
1650                         /*
1651                          * Enter the entity of the original graph. This is needed
1652                          * for inline_method(). However, note that ent->irg still points
1653                          * to callee, NOT to copy.
1654                          */
1655                         set_irg_entity(copy, get_irg_entity(callee));
1656
1657                         pmap_insert(copied_graphs, callee, copy);
1658                         callee = copy;
1659
1660                         /* we have only one caller: the original graph */
1661                         callee_env->n_callers      = 1;
1662                         callee_env->n_callers_orig = 1;
1663                 }
1664                 if (! phiproj_computed) {
1665                         phiproj_computed = 1;
1666                         collect_phiprojs(current_ir_graph);
1667                 }
1668                 did_inline = inline_method(call_node, callee);
1669                 if (!did_inline)
1670                         continue;
1671
1672                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1673                 phiproj_computed = 0;
1674
1675                 /* remove it from the caller list */
1676                 list_del(&curr_call->list);
1677
1678                 /* callee was inline. Append its call list. */
1679                 env->got_inline = 1;
1680                 --env->n_call_nodes;
1681
1682                 /* we just generate a bunch of new calls */
1683                 loop_depth = curr_call->loop_depth;
1684                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1685                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1686                         ir_node        *new_call;
1687                         call_entry     *new_entry;
1688
1689                         /* after we have inlined callee, all called methods inside
1690                          * callee are now called once more */
1691                         ++penv->n_callers;
1692
1693                         /* Note that the src list points to Call nodes in the inlined graph,
1694                          * but we need Call nodes in our graph. Luckily the inliner leaves
1695                          * this information in the link field. */
1696                         new_call = (ir_node*)get_irn_link(centry->call);
1697                         assert(is_Call(new_call));
1698
1699                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1700                         list_add_tail(&new_entry->list, &env->calls);
1701                         maybe_push_call(pqueue, new_entry, inline_threshold);
1702                 }
1703
1704                 env->n_call_nodes += callee_env->n_call_nodes;
1705                 env->n_nodes += callee_env->n_nodes;
1706                 --callee_env->n_callers;
1707         }
1708         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1709         del_pqueue(pqueue);
1710 }
1711
1712 /*
1713  * Heuristic inliner. Calculates a benefice value for every call and inlines
1714  * those calls with a value higher than the threshold.
1715  */
1716 void inline_functions(unsigned maxsize, int inline_threshold,
1717                       opt_ptr after_inline_opt)
1718 {
1719         inline_irg_env   *env;
1720         size_t           i, n_irgs;
1721         ir_graph         *rem;
1722         wenv_t           wenv;
1723         pmap             *copied_graphs;
1724         pmap_entry       *pm_entry;
1725         ir_graph         **irgs;
1726
1727         rem = current_ir_graph;
1728         obstack_init(&temp_obst);
1729
1730         irgs = create_irg_list();
1731
1732         /* a map for the copied graphs, used to inline recursive calls */
1733         copied_graphs = pmap_create();
1734
1735         /* extend all irgs by a temporary data structure for inlining. */
1736         n_irgs = get_irp_n_irgs();
1737         for (i = 0; i < n_irgs; ++i)
1738                 set_irg_link(irgs[i], alloc_inline_irg_env());
1739
1740         /* Pre-compute information in temporary data structure. */
1741         wenv.ignore_runtime = 0;
1742         wenv.ignore_callers = 0;
1743         for (i = 0; i < n_irgs; ++i) {
1744                 ir_graph *irg = irgs[i];
1745
1746                 free_callee_info(irg);
1747
1748                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1749                 assure_cf_loop(irg);
1750                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1751         }
1752
1753         /* -- and now inline. -- */
1754         for (i = 0; i < n_irgs; ++i) {
1755                 ir_graph *irg = irgs[i];
1756
1757                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1758         }
1759
1760         for (i = 0; i < n_irgs; ++i) {
1761                 ir_graph *irg = irgs[i];
1762
1763                 env = (inline_irg_env*)get_irg_link(irg);
1764                 if (env->got_inline && after_inline_opt != NULL) {
1765                         /* this irg got calls inlined: optimize it */
1766                         after_inline_opt(irg);
1767                 }
1768                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1769                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1770                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1771                         env->n_callers_orig, env->n_callers,
1772                         get_entity_name(get_irg_entity(irg))));
1773                 }
1774         }
1775
1776         /* kill the copied graphs: we don't need them anymore */
1777         foreach_pmap(copied_graphs, pm_entry) {
1778                 ir_graph *copy = (ir_graph*)pm_entry->value;
1779
1780                 /* reset the entity, otherwise it will be deleted in the next step ... */
1781                 set_irg_entity(copy, NULL);
1782                 free_ir_graph(copy);
1783         }
1784         pmap_destroy(copied_graphs);
1785
1786         xfree(irgs);
1787
1788         obstack_free(&temp_obst, NULL);
1789         current_ir_graph = rem;
1790 }
1791
1792 typedef struct inline_functions_pass_t {
1793         ir_prog_pass_t pass;
1794         unsigned       maxsize;
1795         int            inline_threshold;
1796         opt_ptr        after_inline_opt;
1797 } inline_functions_pass_t;
1798
1799 /**
1800  * Wrapper to run inline_functions() as a ir_prog pass.
1801  */
1802 static int inline_functions_wrapper(ir_prog *irp, void *context)
1803 {
1804         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1805
1806         (void)irp;
1807         inline_functions(pass->maxsize, pass->inline_threshold,
1808                          pass->after_inline_opt);
1809         return 0;
1810 }
1811
1812 /* create a ir_prog pass for inline_functions */
1813 ir_prog_pass_t *inline_functions_pass(
1814           const char *name, unsigned maxsize, int inline_threshold,
1815           opt_ptr after_inline_opt)
1816 {
1817         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1818
1819         pass->maxsize          = maxsize;
1820         pass->inline_threshold = inline_threshold;
1821         pass->after_inline_opt = after_inline_opt;
1822
1823         return def_prog_pass_constructor(
1824                 &pass->pass, name ? name : "inline_functions",
1825                 inline_functions_wrapper);
1826 }
1827
1828 void firm_init_inline(void)
1829 {
1830         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1831 }