nsz Git - libfirm/blob - ir/opt/opt_inline.c

   1 /*
   2  * This file is part of libFirm.
   3  * Copyright (C) 2012 University of Karlsruhe.
   4  */
   5
   6 /**
   7  * @file
   8  * @brief    Dead node elimination and Procedure Inlining.
   9  * @author   Michael Beck, Goetz Lindenmaier
  10  */
  11 #include "config.h"
  12
  13 #include <limits.h>
  14 #include <stdbool.h>
  15 #include <assert.h>
  16
  17 #include "irnode_t.h"
  18 #include "irgraph_t.h"
  19 #include "irprog_t.h"
  20
  21 #include "iroptimize.h"
  22 #include "ircons_t.h"
  23 #include "iropt_t.h"
  24 #include "irgopt.h"
  25 #include "irgmod.h"
  26 #include "irgwalk.h"
  27
  28 #include "array_t.h"
  29 #include "list.h"
  30 #include "pset.h"
  31 #include "pmap.h"
  32 #include "pdeq.h"
  33 #include "xmalloc.h"
  34 #include "pqueue.h"
  35
  36 #include "irouts.h"
  37 #include "irloop_t.h"
  38 #include "irbackedge_t.h"
  39 #include "opt_init.h"
  40 #include "cgana.h"
  41 #include "trouts.h"
  42 #include "error.h"
  43
  44 #include "analyze_irg_args.h"
  45 #include "iredges_t.h"
  46 #include "irflag_t.h"
  47 #include "irhooks.h"
  48 #include "irtools.h"
  49 #include "iropt_dbg.h"
  50 #include "irpass_t.h"
  51 #include "irnodemap.h"
  52
  53 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  54
  55 /*------------------------------------------------------------------*/
  56 /* Routines for dead node elimination / copying garbage collection  */
  57 /* of the obstack.                                                  */
  58 /*------------------------------------------------------------------*/
  59
  60 /**
  61  * Remember the new node in the old node by using a field all nodes have.
  62  */
  63 static void set_new_node(ir_node *node, ir_node *new_node)
  64 {
  65         set_irn_link(node, new_node);
  66 }
  67
  68 /**
  69  * Get this new node, before the old node is forgotten.
  70  */
  71 static inline ir_node *get_new_node(ir_node *old_node)
  72 {
  73         assert(irn_visited(old_node));
  74         return (ir_node*) get_irn_link(old_node);
  75 }
  76
  77 /*--------------------------------------------------------------------*/
  78 /*  Functionality for inlining                                         */
  79 /*--------------------------------------------------------------------*/
  80
  81 /**
  82  * Copy node for inlineing.  Updates attributes that change when
  83  * inlineing but not for dead node elimination.
  84  *
  85  * Copies the node by calling copy_node() and then updates the entity if
  86  * it's a local one.  env must be a pointer of the frame type of the
  87  * inlined procedure. The new entities must be in the link field of
  88  * the entities.
  89  */
  90 static void copy_node_inline(ir_node *node, void *env)
  91 {
  92         ir_graph *new_irg  = (ir_graph*) env;
  93         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
  94
  95         set_new_node(node, new_node);
  96         if (is_Sel(node)) {
  97                 ir_graph  *old_irg        = get_irn_irg(node);
  98                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
  99                 ir_entity *old_entity     = get_Sel_entity(node);
 100                 assert(is_Sel(new_node));
 101                 /* use copied entities from the new frame */
 102                 if (get_entity_owner(old_entity) == old_frame_type) {
 103                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
 104                         assert(new_entity != NULL);
 105                         set_Sel_entity(new_node, new_entity);
 106                 }
 107         } else if (is_Block(new_node)) {
 108                 new_node->attr.block.irg.irg = new_irg;
 109         }
 110 }
 111
 112 static void set_preds_inline(ir_node *node, void *env)
 113 {
 114         ir_node *new_node;
 115
 116         irn_rewire_inputs(node);
 117
 118         /* move constants into start block */
 119         new_node = get_new_node(node);
 120         if (is_irn_start_block_placed(new_node)) {
 121                 ir_graph *new_irg     = (ir_graph *) env;
 122                 ir_node  *start_block = get_irg_start_block(new_irg);
 123                 set_nodes_block(new_node, start_block);
 124         }
 125 }
 126
 127 /**
 128  * Walker: checks if P_value_arg_base is used.
 129  */
 130 static void find_addr(ir_node *node, void *env)
 131 {
 132         bool *allow_inline = (bool*)env;
 133
 134         if (is_Block(node) && get_Block_entity(node)) {
 135                 /**
 136                  * Currently we can't handle blocks whose address was taken correctly
 137                  * when inlining
 138                  */
 139                 *allow_inline = false;
 140         } else if (is_Sel(node)) {
 141                 ir_graph *irg = current_ir_graph;
 142                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
 143                         /* access to frame */
 144                         ir_entity *ent = get_Sel_entity(node);
 145                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
 146                                 /* access to value_type */
 147                                 *allow_inline = false;
 148                         }
 149                         if (is_parameter_entity(ent)) {
 150                                 *allow_inline = false;
 151                         }
 152                 }
 153         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
 154                 /* From GCC:
 155                  * Refuse to inline alloca call unless user explicitly forced so as this
 156                  * may change program's memory overhead drastically when the function
 157                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
 158                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
 159                  *
 160                  * Sorrily this is true with our implementation also.
 161                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
 162                  * this disables inlining of functions using VLA (which are completely
 163                  * save).
 164                  *
 165                  * 2 Solutions:
 166                  * - add a flag to the Alloc node for "real" alloca() calls
 167                  * - add a new Stack-Restore node at the end of a function using
 168                  *   alloca()
 169                  */
 170                 *allow_inline = false;
 171         }
 172 }
 173
 174 /**
 175  * Check if we can inline a given call.
 176  * Currently, we cannot inline two cases:
 177  * - call with compound arguments
 178  * - graphs that take the address of a parameter
 179  *
 180  * check these conditions here
 181  */
 182 static bool can_inline(ir_node *call, ir_graph *called_graph)
 183 {
 184         ir_entity          *called      = get_irg_entity(called_graph);
 185         ir_type            *called_type = get_entity_type(called);
 186         ir_type            *call_type   = get_Call_type(call);
 187         size_t              n_params    = get_method_n_params(called_type);
 188         size_t              n_arguments = get_method_n_params(call_type);
 189         size_t              n_res       = get_method_n_ress(called_type);
 190         mtp_additional_properties props = get_entity_additional_properties(called);
 191         size_t              i;
 192         bool                res;
 193
 194         if (props & mtp_property_noinline)
 195                 return false;
 196
 197         if (n_arguments != n_params) {
 198                 /* this is a bad feature of C: without a prototype, we can
 199                  * call a function with less parameters than needed. Currently
 200                  * we don't support this, although we could use Unknown than. */
 201                 return false;
 202         }
 203         if (n_res != get_method_n_ress(call_type)) {
 204                 return false;
 205         }
 206
 207         /* Argh, compiling C has some bad consequences:
 208          * It is implementation dependent what happens in that case.
 209          * We support inlining, if the bitsize of the types matches AND
 210          * the same arithmetic is used. */
 211         for (i = 0; i < n_params; ++i) {
 212                 ir_type *param_tp = get_method_param_type(called_type, i);
 213                 ir_type *arg_tp   = get_method_param_type(call_type, i);
 214
 215                 if (param_tp != arg_tp) {
 216                         ir_mode *pmode = get_type_mode(param_tp);
 217                         ir_mode *amode = get_type_mode(arg_tp);
 218
 219                         if (pmode == NULL || amode == NULL)
 220                                 return false;
 221                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
 222                                 return false;
 223                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
 224                                 return false;
 225                         /* otherwise we can simply "reinterpret" the bits */
 226                 }
 227         }
 228         for (i = 0; i < n_res; ++i) {
 229                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
 230                 ir_type *used_res_tp = get_method_res_type(call_type, i);
 231
 232                 if (decl_res_tp != used_res_tp) {
 233                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
 234                         ir_mode *used_mode = get_type_mode(used_res_tp);
 235                         if (decl_mode == NULL || used_mode == NULL)
 236                                 return false;
 237                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
 238                                 return false;
 239                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
 240                                 return false;
 241                         /* otherwise we can "reinterpret" the bits */
 242                 }
 243         }
 244
 245         /* check parameters for compound arguments */
 246         for (i = 0; i < n_params; ++i) {
 247                 ir_type *p_type = get_method_param_type(call_type, i);
 248
 249                 if (is_compound_type(p_type) || is_Array_type(p_type))
 250                         return false;
 251         }
 252
 253         /* check results for compound arguments */
 254         for (i = 0; i < n_res; ++i) {
 255                 ir_type *r_type = get_method_res_type(call_type, i);
 256
 257                 if (is_compound_type(r_type) || is_Array_type(r_type))
 258                         return false;
 259         }
 260
 261         res = true;
 262         irg_walk_graph(called_graph, find_addr, NULL, &res);
 263
 264         return res;
 265 }
 266
 267 enum exc_mode {
 268         exc_handler,    /**< There is a handler. */
 269         exc_no_handler  /**< Exception handling not represented. */
 270 };
 271
 272 /**
 273  * copy all entities on the stack frame on 1 irg to the stackframe of another.
 274  * Sets entity links of the old entities to the copies
 275  */
 276 static void copy_frame_entities(ir_graph *from, ir_graph *to)
 277 {
 278         ir_type *from_frame = get_irg_frame_type(from);
 279         ir_type *to_frame   = get_irg_frame_type(to);
 280         size_t   n_members  = get_class_n_members(from_frame);
 281         size_t   i;
 282         assert(from_frame != to_frame);
 283
 284         for (i = 0; i < n_members; ++i) {
 285                 ir_entity *old_ent = get_class_member(from_frame, i);
 286                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
 287                 set_entity_link(old_ent, new_ent);
 288                 assert (!is_parameter_entity(old_ent));
 289         }
 290 }
 291
 292 /* Inlines a method at the given call site. */
 293 int inline_method(ir_node *const call, ir_graph *called_graph)
 294 {
 295         /* we cannot inline some types of calls */
 296         if (! can_inline(call, called_graph))
 297                 return 0;
 298
 299         /* We cannot inline a recursive call. The graph must be copied before
 300          * the call the inline_method() using create_irg_copy(). */
 301         ir_graph *irg = get_irn_irg(call);
 302         if (called_graph == irg)
 303                 return 0;
 304
 305         ir_entity *ent      = get_irg_entity(called_graph);
 306         ir_type   *mtp      = get_entity_type(ent);
 307         ir_type   *ctp      = get_Call_type(call);
 308         int        n_params = get_method_n_params(mtp);
 309
 310         ir_graph *rem = current_ir_graph;
 311         current_ir_graph = irg;
 312
 313         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
 314
 315         /* optimizations can cause problems when allocating new nodes */
 316         int rem_opt = get_opt_optimize();
 317         set_optimize(0);
 318
 319         /* Handle graph state */
 320         assert(get_irg_pinned(irg) == op_pin_state_pinned);
 321         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
 322         clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
 323                            | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
 324         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 325         clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
 326         edges_deactivate(irg);
 327
 328         /* here we know we WILL inline, so inform the statistics */
 329         hook_inline(call, called_graph);
 330
 331         /* -- Decide how to handle exception control flow: Is there a handler
 332            for the Call node, or do we branch directly to End on an exception?
 333            exc_handling:
 334            0 There is a handler.
 335            2 Exception handling not represented in Firm. -- */
 336         ir_node *Xproj = NULL;
 337         for (ir_node *proj = (ir_node*)get_irn_link(call); proj != NULL;
 338                  proj = (ir_node*)get_irn_link(proj)) {
 339                 long proj_nr = get_Proj_proj(proj);
 340                 if (proj_nr == pn_Call_X_except) Xproj = proj;
 341         }
 342         enum exc_mode exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
 343
 344         /* create the argument tuple */
 345         ir_node **args_in = ALLOCAN(ir_node*, n_params);
 346
 347         ir_node *block = get_nodes_block(call);
 348         for (int i = n_params - 1; i >= 0; --i) {
 349                 ir_node *arg      = get_Call_param(call, i);
 350                 ir_type *param_tp = get_method_param_type(mtp, i);
 351                 ir_mode *mode     = get_type_mode(param_tp);
 352
 353                 if (mode != get_irn_mode(arg)) {
 354                         arg = new_r_Conv(block, arg, mode);
 355                 }
 356                 args_in[i] = arg;
 357         }
 358
 359         /* the procedure and later replaces the Start node of the called graph.
 360          * Post_call is the old Call node and collects the results of the called
 361          * graph. Both will end up being a tuple. */
 362         ir_node *post_bl = get_nodes_block(call);
 363         /* XxMxPxPxPxT of Start + parameter of Call */
 364         ir_node *in[pn_Start_max+1];
 365         in[pn_Start_M]              = get_Call_mem(call);
 366         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
 367         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
 368         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
 369         ir_node *pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
 370
 371         /* --
 372            The new block gets the ins of the old block, pre_call and all its
 373            predecessors and all Phi nodes. -- */
 374         part_block(pre_call);
 375
 376         /* increment visited flag for later walk */
 377         inc_irg_visited(called_graph);
 378
 379         /* link some nodes to nodes in the current graph so instead of copying
 380          * the linked nodes will get used.
 381          * So the copier will use the created Tuple instead of copying the start
 382          * node, similar for singleton nodes like NoMem and Bad.
 383          * Note: this will prohibit predecessors to be copied - only do it for
 384          *       nodes without predecessors */
 385         ir_node *start_block = get_irg_start_block(called_graph);
 386         set_new_node(start_block, get_nodes_block(pre_call));
 387         mark_irn_visited(start_block);
 388
 389         ir_node *start = get_irg_start(called_graph);
 390         set_new_node(start, pre_call);
 391         mark_irn_visited(start);
 392
 393         ir_node *nomem = get_irg_no_mem(called_graph);
 394         set_new_node(nomem, get_irg_no_mem(irg));
 395         mark_irn_visited(nomem);
 396
 397         /* entitiy link is used to link entities on old stackframe to the
 398          * new stackframe */
 399         irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
 400
 401         /* copy entities and nodes */
 402         assert(!irn_visited(get_irg_end(called_graph)));
 403         copy_frame_entities(called_graph, irg);
 404         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
 405                       irg);
 406
 407         irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
 408
 409         /* -- Merge the end of the inlined procedure with the call site -- */
 410         /* We will turn the old Call node into a Tuple with the following
 411            predecessors:
 412            -1:  Block of Tuple.
 413            0: Phi of all Memories of Return statements.
 414            1: Jmp from new Block that merges the control flow from all exception
 415            predecessors of the old end block.
 416            2: Tuple of all arguments.
 417            3: Phi of Exception memories.
 418            In case the old Call directly branches to End on an exception we don't
 419            need the block merging all exceptions nor the Phi of the exception
 420            memories.
 421         */
 422
 423         /* Precompute some values */
 424         ir_node *end_bl = get_new_node(get_irg_end_block(called_graph));
 425         ir_node *end    = get_new_node(get_irg_end(called_graph));
 426         int      arity  = get_Block_n_cfgpreds(end_bl); /* arity = n_exc + n_ret  */
 427         int      n_res  = get_method_n_ress(get_Call_type(call));
 428
 429         ir_node **res_pred = XMALLOCN(ir_node*, n_res);
 430         ir_node **cf_pred  = XMALLOCN(ir_node*, arity);
 431
 432         /* archive keepalives */
 433         int irn_arity = get_irn_arity(end);
 434         for (int i = 0; i < irn_arity; i++) {
 435                 ir_node *ka = get_End_keepalive(end, i);
 436                 if (! is_Bad(ka))
 437                         add_End_keepalive(get_irg_end(irg), ka);
 438         }
 439
 440         /* replace Return nodes by Jump nodes */
 441         int n_ret = 0;
 442         for (int i = 0; i < arity; i++) {
 443                 ir_node *ret = get_Block_cfgpred(end_bl, i);
 444                 if (is_Return(ret)) {
 445                         ir_node *block = get_nodes_block(ret);
 446                         cf_pred[n_ret] = new_r_Jmp(block);
 447                         n_ret++;
 448                 }
 449         }
 450         set_irn_in(post_bl, n_ret, cf_pred);
 451
 452         /* build a Tuple for all results of the method.
 453          * add Phi node if there was more than one Return. */
 454         /* First the Memory-Phi */
 455         int n_mem_phi = 0;
 456         for (int i = 0; i < arity; i++) {
 457                 ir_node *ret = get_Block_cfgpred(end_bl, i);
 458                 if (is_Return(ret)) {
 459                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
 460                 }
 461                 /* memory output for some exceptions is directly connected to End */
 462                 if (is_Call(ret)) {
 463                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
 464                 } else if (is_fragile_op(ret)) {
 465                         /* We rely that all cfops have the memory output at the same position. */
 466                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
 467                 } else if (is_Raise(ret)) {
 468                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
 469                 }
 470         }
 471         ir_node *const call_mem = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
 472         /* Conserve Phi-list for further inlinings -- but might be optimized */
 473         if (get_nodes_block(call_mem) == post_bl) {
 474                 set_irn_link(call_mem, get_irn_link(post_bl));
 475                 set_irn_link(post_bl, call_mem);
 476         }
 477         /* Now the real results */
 478         ir_node *call_res;
 479         if (n_res > 0) {
 480                 for (int j = 0; j < n_res; j++) {
 481                         ir_type *res_type = get_method_res_type(ctp, j);
 482                         ir_mode *res_mode = get_type_mode(res_type);
 483                         int n_ret = 0;
 484                         for (int i = 0; i < arity; i++) {
 485                                 ir_node *ret = get_Block_cfgpred(end_bl, i);
 486                                 if (is_Return(ret)) {
 487                                         ir_node *res = get_Return_res(ret, j);
 488                                         if (get_irn_mode(res) != res_mode) {
 489                                                 ir_node *block = get_nodes_block(res);
 490                                                 res = new_r_Conv(block, res, res_mode);
 491                                         }
 492                                         cf_pred[n_ret] = res;
 493                                         n_ret++;
 494                                 }
 495                         }
 496                         ir_node *const phi = n_ret > 0
 497                                 ? new_r_Phi(post_bl, n_ret, cf_pred, res_mode)
 498                                 : new_r_Bad(irg, res_mode);
 499                         res_pred[j] = phi;
 500                         /* Conserve Phi-list for further inlinings -- but might be optimized */
 501                         if (get_nodes_block(phi) == post_bl) {
 502                                 set_Phi_next(phi, get_Block_phis(post_bl));
 503                                 set_Block_phis(post_bl, phi);
 504                         }
 505                 }
 506                 call_res = new_r_Tuple(post_bl, n_res, res_pred);
 507         } else {
 508                 call_res = new_r_Bad(irg, mode_T);
 509         }
 510         /* handle the regular call */
 511         ir_node *const call_x_reg = new_r_Jmp(post_bl);
 512
 513         /* Finally the exception control flow.
 514            We have two possible situations:
 515            First if the Call branches to an exception handler:
 516            We need to add a Phi node to
 517            collect the memory containing the exception objects.  Further we need
 518            to add another block to get a correct representation of this Phi.  To
 519            this block we add a Jmp that resolves into the X output of the Call
 520            when the Call is turned into a tuple.
 521            Second: There is no exception edge. Just add all inlined exception
 522            branches to the End node.
 523          */
 524         ir_node *call_x_exc;
 525         if (exc_handling == exc_handler) {
 526                 int n_exc = 0;
 527                 for (int i = 0; i < arity; i++) {
 528                         ir_node *ret = get_Block_cfgpred(end_bl, i);
 529                         ir_node *irn = skip_Proj(ret);
 530                         if (is_fragile_op(irn) || is_Raise(irn)) {
 531                                 cf_pred[n_exc] = ret;
 532                                 ++n_exc;
 533                         }
 534                 }
 535                 if (n_exc > 0) {
 536                         if (n_exc == 1) {
 537                                 /* simple fix */
 538                                 call_x_exc = cf_pred[0];
 539                         } else {
 540                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
 541                                 call_x_exc = new_r_Jmp(block);
 542                         }
 543                 } else {
 544                         call_x_exc = new_r_Bad(irg, mode_X);
 545                 }
 546         } else {
 547                 /* assert(exc_handling == 1 || no exceptions. ) */
 548                 int n_exc = 0;
 549                 for (int i = 0; i < arity; i++) {
 550                         ir_node *ret = get_Block_cfgpred(end_bl, i);
 551                         ir_node *irn = skip_Proj(ret);
 552
 553                         if (is_fragile_op(irn) || is_Raise(irn)) {
 554                                 cf_pred[n_exc] = ret;
 555                                 n_exc++;
 556                         }
 557                 }
 558                 ir_node  *main_end_bl       = get_irg_end_block(irg);
 559                 int       main_end_bl_arity = get_irn_arity(main_end_bl);
 560                 ir_node **end_preds         = XMALLOCN(ir_node*, n_exc+main_end_bl_arity);
 561
 562                 for (int i = 0; i < main_end_bl_arity; ++i)
 563                         end_preds[i] = get_irn_n(main_end_bl, i);
 564                 for (int i = 0; i < n_exc; ++i)
 565                         end_preds[main_end_bl_arity + i] = cf_pred[i];
 566                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
 567                 call_x_exc = new_r_Bad(irg, mode_X);
 568                 free(end_preds);
 569         }
 570         free(res_pred);
 571         free(cf_pred);
 572
 573         ir_node *const call_in[] = {
 574                 [pn_Call_M]         = call_mem,
 575                 [pn_Call_T_result]  = call_res,
 576                 [pn_Call_X_regular] = call_x_reg,
 577                 [pn_Call_X_except]  = call_x_exc,
 578         };
 579         turn_into_tuple(call, ARRAY_SIZE(call_in), call_in);
 580
 581         /* --  Turn CSE back on. -- */
 582         set_optimize(rem_opt);
 583         current_ir_graph = rem;
 584
 585         return 1;
 586 }
 587
 588 /********************************************************************/
 589 /* Apply inlining to small methods.                                 */
 590 /********************************************************************/
 591
 592 static struct obstack  temp_obst;
 593
 594 /** Represents a possible inlinable call in a graph. */
 595 typedef struct call_entry {
 596         ir_node    *call;       /**< The Call node. */
 597         ir_graph   *callee;     /**< The callee IR-graph. */
 598         list_head  list;        /**< List head for linking the next one. */
 599         int        loop_depth;  /**< The loop depth of this call. */
 600         int        benefice;    /**< The calculated benefice of this call. */
 601         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
 602         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
 603 } call_entry;
 604
 605 /**
 606  * Returns the irg called from a Call node. If the irg is not
 607  * known, NULL is returned.
 608  *
 609  * @param call  the call node
 610  */
 611 static ir_graph *get_call_called_irg(ir_node *call)
 612 {
 613         ir_node *addr;
 614
 615         addr = get_Call_ptr(call);
 616         if (is_SymConst_addr_ent(addr)) {
 617                 ir_entity *ent = get_SymConst_entity(addr);
 618                 /* we don't know which function gets finally bound to a weak symbol */
 619                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
 620                         return NULL;
 621
 622                 return get_entity_irg(ent);
 623         }
 624
 625         return NULL;
 626 }
 627
 628 /**
 629  * Environment for inlining irgs.
 630  */
 631 typedef struct {
 632         list_head calls;             /**< List of of all call nodes in this graph. */
 633         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
 634         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
 635         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
 636         unsigned  n_nodes_orig;      /**< for statistics */
 637         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
 638         unsigned  n_call_nodes_orig; /**< for statistics */
 639         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
 640         unsigned  n_callers_orig;    /**< for statistics */
 641         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
 642         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
 643 } inline_irg_env;
 644
 645 /**
 646  * Allocate a new environment for inlining.
 647  */
 648 static inline_irg_env *alloc_inline_irg_env(void)
 649 {
 650         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
 651         INIT_LIST_HEAD(&env->calls);
 652         env->local_weights     = NULL;
 653         env->n_nodes           = -2; /* do not count count Start, End */
 654         env->n_blocks          = -2; /* do not count count Start, End Block */
 655         env->n_nodes_orig      = -2; /* do not count Start, End */
 656         env->n_call_nodes      = 0;
 657         env->n_call_nodes_orig = 0;
 658         env->n_callers         = 0;
 659         env->n_callers_orig    = 0;
 660         env->got_inline        = 0;
 661         env->recursive         = 0;
 662         return env;
 663 }
 664
 665 typedef struct walker_env {
 666         inline_irg_env *x;     /**< the inline environment */
 667         char ignore_runtime;   /**< the ignore runtime flag */
 668         char ignore_callers;   /**< if set, do change callers data */
 669 } wenv_t;
 670
 671 /**
 672  * post-walker: collect all calls in the inline-environment
 673  * of a graph and sum some statistics.
 674  */
 675 static void collect_calls2(ir_node *call, void *ctx)
 676 {
 677         wenv_t         *env = (wenv_t*)ctx;
 678         inline_irg_env *x = env->x;
 679         unsigned        code = get_irn_opcode(call);
 680         ir_graph       *callee;
 681         call_entry     *entry;
 682
 683         /* count meaningful nodes in irg */
 684         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
 685                 if (code != iro_Block) {
 686                         ++x->n_nodes;
 687                         ++x->n_nodes_orig;
 688                 } else {
 689                         ++x->n_blocks;
 690                 }
 691         }
 692
 693         if (code != iro_Call) return;
 694
 695         /* check, if it's a runtime call */
 696         if (env->ignore_runtime) {
 697                 ir_node *symc = get_Call_ptr(call);
 698
 699                 if (is_SymConst_addr_ent(symc)) {
 700                         ir_entity *ent = get_SymConst_entity(symc);
 701
 702                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
 703                                 return;
 704                 }
 705         }
 706
 707         /* collect all call nodes */
 708         ++x->n_call_nodes;
 709         ++x->n_call_nodes_orig;
 710
 711         callee = get_call_called_irg(call);
 712         if (callee != NULL) {
 713                 if (! env->ignore_callers) {
 714                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
 715                         /* count all static callers */
 716                         ++callee_env->n_callers;
 717                         ++callee_env->n_callers_orig;
 718                 }
 719                 if (callee == current_ir_graph)
 720                         x->recursive = 1;
 721
 722                 /* link it in the list of possible inlinable entries */
 723                 entry = OALLOC(&temp_obst, call_entry);
 724                 entry->call       = call;
 725                 entry->callee     = callee;
 726                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
 727                 entry->benefice   = 0;
 728                 entry->local_adr  = 0;
 729                 entry->all_const  = 0;
 730
 731                 list_add_tail(&entry->list, &x->calls);
 732         }
 733 }
 734
 735 /**
 736  * Duplicate a call entry.
 737  *
 738  * @param entry     the original entry to duplicate
 739  * @param new_call  the new call node
 740  * @param loop_depth_delta
 741  *                  delta value for the loop depth
 742  */
 743 static call_entry *duplicate_call_entry(const call_entry *entry,
 744                                         ir_node *new_call, int loop_depth_delta)
 745 {
 746         call_entry *nentry = OALLOC(&temp_obst, call_entry);
 747         nentry->call       = new_call;
 748         nentry->callee     = entry->callee;
 749         nentry->benefice   = entry->benefice;
 750         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
 751         nentry->local_adr  = entry->local_adr;
 752         nentry->all_const  = entry->all_const;
 753
 754         return nentry;
 755 }
 756
 757 /**
 758  * Calculate the parameter weights for transmitting the address of a local variable.
 759  */
 760 static unsigned calc_method_local_weight(ir_node *arg)
 761 {
 762         int      j;
 763         unsigned v, weight = 0;
 764
 765         for (unsigned i = get_irn_n_outs(arg); i-- > 0; ) {
 766                 ir_node *succ = get_irn_out(arg, i);
 767
 768                 switch (get_irn_opcode(succ)) {
 769                 case iro_Load:
 770                 case iro_Store:
 771                         /* Loads and Store can be removed */
 772                         weight += 3;
 773                         break;
 774                 case iro_Sel:
 775                         /* check if all args are constant */
 776                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
 777                                 ir_node *idx = get_Sel_index(succ, j);
 778                                 if (! is_Const(idx))
 779                                         return 0;
 780                         }
 781                         /* Check users on this Sel. Note: if a 0 is returned here, there was
 782                            some unsupported node. */
 783                         v = calc_method_local_weight(succ);
 784                         if (v == 0)
 785                                 return 0;
 786                         /* we can kill one Sel with constant indexes, this is cheap */
 787                         weight += v + 1;
 788                         break;
 789                 case iro_Id:
 790                         /* when looking backward we might find Id nodes */
 791                         weight += calc_method_local_weight(succ);
 792                         break;
 793                 case iro_Tuple:
 794                         /* unoptimized tuple */
 795                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
 796                                 ir_node *pred = get_Tuple_pred(succ, j);
 797                                 if (pred == arg) {
 798                                         /* look for Proj(j) */
 799                                         for (unsigned k = get_irn_n_outs(succ); k-- > 0; ) {
 800                                                 ir_node *succ_succ = get_irn_out(succ, k);
 801                                                 if (is_Proj(succ_succ)) {
 802                                                         if (get_Proj_proj(succ_succ) == j) {
 803                                                                 /* found */
 804                                                                 weight += calc_method_local_weight(succ_succ);
 805                                                         }
 806                                                 } else {
 807                                                         /* this should NOT happen */
 808                                                         return 0;
 809                                                 }
 810                                         }
 811                                 }
 812                         }
 813                         break;
 814                 default:
 815                         /* any other node: unsupported yet or bad. */
 816                         return 0;
 817                 }
 818         }
 819         return weight;
 820 }
 821
 822 /**
 823  * Calculate the parameter weights for transmitting the address of a local variable.
 824  */
 825 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
 826 {
 827         ir_entity *ent = get_irg_entity(irg);
 828         ir_type  *mtp;
 829         size_t   nparams;
 830         long     proj_nr;
 831         ir_node  *irg_args, *arg;
 832
 833         mtp      = get_entity_type(ent);
 834         nparams  = get_method_n_params(mtp);
 835
 836         /* allocate a new array. currently used as 'analysed' flag */
 837         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
 838
 839         /* If the method haven't parameters we have nothing to do. */
 840         if (nparams <= 0)
 841                 return;
 842
 843         assure_irg_outs(irg);
 844         irg_args = get_irg_args(irg);
 845         for (unsigned i = get_irn_n_outs(irg_args); i-- > 0; ) {
 846                 arg     = get_irn_out(irg_args, i);
 847                 proj_nr = get_Proj_proj(arg);
 848                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
 849         }
 850 }
 851
 852 /**
 853  * Calculate the benefice for transmitting an local variable address.
 854  * After inlining, the local variable might be transformed into a
 855  * SSA variable by scalar_replacement().
 856  */
 857 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
 858 {
 859         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
 860
 861         if (env->local_weights == NULL)
 862                 analyze_irg_local_weights(env, callee);
 863
 864         if (pos < ARR_LEN(env->local_weights))
 865                 return env->local_weights[pos];
 866         return 0;
 867 }
 868
 869 /**
 870  * Calculate a benefice value for inlining the given call.
 871  *
 872  * @param call       the call node we have to inspect
 873  * @param callee     the called graph
 874  */
 875 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
 876 {
 877         ir_node   *call = entry->call;
 878         ir_entity *ent  = get_irg_entity(callee);
 879         ir_type   *callee_frame;
 880         size_t    i, n_members, n_params;
 881         ir_node   *frame_ptr;
 882         ir_type   *mtp;
 883         int       weight = 0;
 884         int       all_const;
 885         unsigned  cc, v;
 886
 887         inline_irg_env *callee_env;
 888
 889         mtp_additional_properties props = get_entity_additional_properties(ent);
 890         if (props & mtp_property_noinline) {
 891                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
 892                     call, callee));
 893                 return entry->benefice = INT_MIN;
 894         }
 895
 896         callee_frame = get_irg_frame_type(callee);
 897         n_members = get_class_n_members(callee_frame);
 898         for (i = 0; i < n_members; ++i) {
 899                 ir_entity *frame_ent = get_class_member(callee_frame, i);
 900                 if (is_parameter_entity(frame_ent)) {
 901                         // TODO inliner should handle parameter entities by inserting Store operations
 902                         DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
 903                         add_entity_additional_properties(ent, mtp_property_noinline);
 904                         return entry->benefice = INT_MIN;
 905                 }
 906         }
 907
 908         if (props & mtp_property_noreturn) {
 909                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
 910                     call, callee));
 911                 return entry->benefice = INT_MIN;
 912         }
 913
 914         /* costs for every passed parameter */
 915         n_params = get_Call_n_params(call);
 916         mtp      = get_entity_type(ent);
 917         cc       = get_method_calling_convention(mtp);
 918         if (cc & cc_reg_param) {
 919                 /* register parameter, smaller costs for register parameters */
 920                 size_t max_regs = cc & ~cc_bits;
 921
 922                 if (max_regs < n_params)
 923                         weight += max_regs * 2 + (n_params - max_regs) * 5;
 924                 else
 925                         weight += n_params * 2;
 926         } else {
 927                 /* parameters are passed an stack */
 928                 weight += 5 * n_params;
 929         }
 930
 931         /* constant parameters improve the benefice */
 932         frame_ptr = get_irg_frame(current_ir_graph);
 933         all_const = 1;
 934         for (i = 0; i < n_params; ++i) {
 935                 ir_node *param = get_Call_param(call, i);
 936
 937                 if (is_Const(param)) {
 938                         weight += get_method_param_weight(ent, i);
 939                 } else {
 940                         all_const = 0;
 941                         if (is_SymConst(param))
 942                                 weight += get_method_param_weight(ent, i);
 943                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
 944                                 /*
 945                                  * An address of a local variable is transmitted. After
 946                                  * inlining, scalar_replacement might be able to remove the
 947                                  * local variable, so honor this.
 948                                  */
 949                                 v = get_method_local_adress_weight(callee, i);
 950                                 weight += v;
 951                                 if (v > 0)
 952                                         entry->local_adr = 1;
 953                         }
 954                 }
 955         }
 956         entry->all_const = all_const;
 957
 958         callee_env = (inline_irg_env*)get_irg_link(callee);
 959         if (callee_env->n_callers == 1 &&
 960             callee != current_ir_graph &&
 961             !entity_is_externally_visible(ent)) {
 962                 weight += 700;
 963         }
 964
 965         /* give a bonus for functions with one block */
 966         if (callee_env->n_blocks == 1)
 967                 weight = weight * 3 / 2;
 968
 969         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
 970         if (callee_env->n_nodes < 30 && !callee_env->recursive)
 971                 weight += 2000;
 972
 973         /* and finally for leafs: they do not increase the register pressure
 974            because of callee safe registers */
 975         if (callee_env->n_call_nodes == 0)
 976                 weight += 400;
 977
 978         /** it's important to inline inner loops first */
 979         if (entry->loop_depth > 30)
 980                 weight += 30 * 1024;
 981         else
 982                 weight += entry->loop_depth * 1024;
 983
 984         /*
 985          * All arguments constant is probably a good sign, give an extra bonus
 986          */
 987         if (all_const)
 988                 weight += 1024;
 989
 990         return entry->benefice = weight;
 991 }
 992
 993 typedef struct walk_env_t {
 994         ir_graph **irgs;
 995         size_t   last_irg;
 996 } walk_env_t;
 997
 998 /**
 999  * Callgraph walker, collect all visited graphs.
1000  */
1001 static void callgraph_walker(ir_graph *irg, void *data)
1002 {
1003         walk_env_t *env = (walk_env_t *)data;
1004         env->irgs[env->last_irg++] = irg;
1005 }
1006
1007 /**
1008  * Creates an inline order for all graphs.
1009  *
1010  * @return the list of graphs.
1011  */
1012 static ir_graph **create_irg_list(void)
1013 {
1014         ir_entity  **free_methods;
1015         size_t     n_irgs = get_irp_n_irgs();
1016         walk_env_t env;
1017
1018         cgana(&free_methods);
1019         xfree(free_methods);
1020
1021         compute_callgraph();
1022
1023         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1024         env.last_irg = 0;
1025
1026         callgraph_walk(NULL, callgraph_walker, &env);
1027         assert(n_irgs == env.last_irg);
1028
1029         free_callgraph();
1030
1031         return env.irgs;
1032 }
1033
1034 /**
1035  * Push a call onto the priority list if its benefice is big enough.
1036  *
1037  * @param pqueue   the priority queue of calls
1038  * @param call     the call entry
1039  * @param inlien_threshold
1040  *                 the threshold value
1041  */
1042 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1043                             int inline_threshold)
1044 {
1045         ir_graph *callee   = call->callee;
1046         int       benefice = calc_inline_benefice(call, callee);
1047
1048         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1049             get_irn_irg(call->call), call->call, callee, benefice));
1050
1051         ir_entity                *ent   = get_irg_entity(callee);
1052         mtp_additional_properties props = get_entity_additional_properties(ent);
1053         if (!(props & mtp_property_always_inline) && benefice < inline_threshold) {
1054                 return;
1055         }
1056
1057         pqueue_put(pqueue, call, benefice);
1058 }
1059
1060 /**
1061  * Try to inline calls into a graph.
1062  *
1063  * @param irg      the graph into which we inline
1064  * @param maxsize  do NOT inline if the size of irg gets
1065  *                 bigger than this amount
1066  * @param inline_threshold
1067  *                 threshold value for inline decision
1068  * @param copied_graphs
1069  *                 map containing copied of recursive graphs
1070  */
1071 static void inline_into(ir_graph *irg, unsigned maxsize,
1072                         int inline_threshold, pmap *copied_graphs)
1073 {
1074         int            phiproj_computed = 0;
1075         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1076         wenv_t         wenv;
1077         pqueue_t       *pqueue;
1078
1079         if (env->n_call_nodes == 0)
1080                 return;
1081
1082         if (env->n_nodes > maxsize) {
1083                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1084                 return;
1085         }
1086
1087         current_ir_graph = irg;
1088         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1089
1090         /* put irgs into the pqueue */
1091         pqueue = new_pqueue();
1092
1093         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1094                 assert(is_Call(curr_call->call));
1095                 maybe_push_call(pqueue, curr_call, inline_threshold);
1096         }
1097
1098         /* note that the list of possible calls is updated during the process */
1099         while (!pqueue_empty(pqueue)) {
1100                 int                 did_inline;
1101                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1102                 ir_graph            *callee     = curr_call->callee;
1103                 ir_node             *call_node  = curr_call->call;
1104                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1105                 ir_entity           *ent        = get_irg_entity(callee);
1106                 mtp_additional_properties props
1107                         = get_entity_additional_properties(ent);
1108                 ir_graph            *calleee;
1109                 int                 loop_depth;
1110
1111                 if (!(props & mtp_property_always_inline)
1112                     && env->n_nodes + callee_env->n_nodes > maxsize) {
1113                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1114                                                 env->n_nodes, callee, callee_env->n_nodes));
1115                         continue;
1116                 }
1117
1118                 calleee = pmap_get(ir_graph, copied_graphs, callee);
1119                 if (calleee != NULL) {
1120                         int benefice = curr_call->benefice;
1121                         /*
1122                          * Reduce the weight for recursive function IFF not all arguments are const.
1123                          * inlining recursive functions is rarely good.
1124                          */
1125                         if (!curr_call->all_const)
1126                                 benefice -= 2000;
1127                         if (benefice < inline_threshold)
1128                                 continue;
1129
1130                         /*
1131                          * Remap callee if we have a copy.
1132                          */
1133                         callee     = calleee;
1134                         callee_env = (inline_irg_env*)get_irg_link(callee);
1135                 }
1136
1137                 if (current_ir_graph == callee) {
1138                         /*
1139                          * Recursive call: we cannot directly inline because we cannot
1140                          * walk the graph and change it. So we have to make a copy of
1141                          * the graph first.
1142                          */
1143                         int benefice = curr_call->benefice;
1144                         ir_graph *copy;
1145
1146                         /*
1147                          * Reduce the weight for recursive function IFF not all arguments are const.
1148                          * inlining recursive functions is rarely good.
1149                          */
1150                         if (!curr_call->all_const)
1151                                 benefice -= 2000;
1152                         if (benefice < inline_threshold)
1153                                 continue;
1154
1155                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1156
1157                         /*
1158                          * No copy yet, create one.
1159                          * Note that recursive methods are never leafs, so it is
1160                          * sufficient to test this condition here.
1161                          */
1162                         copy = create_irg_copy(callee);
1163
1164                         /* create_irg_copy() destroys the Proj links, recompute them */
1165                         phiproj_computed = 0;
1166
1167                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1168
1169                         /* allocate a new environment */
1170                         callee_env = alloc_inline_irg_env();
1171                         set_irg_link(copy, callee_env);
1172
1173                         assure_irg_properties(copy, IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1174                         memset(&wenv, 0, sizeof(wenv));
1175                         wenv.x              = callee_env;
1176                         wenv.ignore_callers = 1;
1177                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1178
1179                         /*
1180                          * Enter the entity of the original graph. This is needed
1181                          * for inline_method(). However, note that ent->irg still points
1182                          * to callee, NOT to copy.
1183                          */
1184                         set_irg_entity(copy, get_irg_entity(callee));
1185
1186                         pmap_insert(copied_graphs, callee, copy);
1187                         callee = copy;
1188
1189                         /* we have only one caller: the original graph */
1190                         callee_env->n_callers      = 1;
1191                         callee_env->n_callers_orig = 1;
1192                 }
1193                 if (! phiproj_computed) {
1194                         phiproj_computed = 1;
1195                         collect_phiprojs(current_ir_graph);
1196                 }
1197                 did_inline = inline_method(call_node, callee);
1198                 if (!did_inline)
1199                         continue;
1200
1201                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1202                 phiproj_computed = 0;
1203
1204                 /* remove it from the caller list */
1205                 list_del(&curr_call->list);
1206
1207                 /* callee was inline. Append its call list. */
1208                 env->got_inline = 1;
1209                 --env->n_call_nodes;
1210
1211                 /* we just generate a bunch of new calls */
1212                 loop_depth = curr_call->loop_depth;
1213                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1214                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1215                         ir_node        *new_call;
1216                         call_entry     *new_entry;
1217
1218                         /* after we have inlined callee, all called methods inside
1219                          * callee are now called once more */
1220                         ++penv->n_callers;
1221
1222                         /* Note that the src list points to Call nodes in the inlined graph,
1223                          * but we need Call nodes in our graph. Luckily the inliner leaves
1224                          * this information in the link field. */
1225                         new_call = (ir_node*)get_irn_link(centry->call);
1226                         if (get_irn_irg(new_call) != irg) {
1227                                 /* centry->call has not been copied, which means it is dead.
1228                                  * This might happen during inlining, if a const function,
1229                                  * which cannot be inlined is only used as an unused argument
1230                                  * of another function, which is inlined. */
1231                                 continue;
1232                         }
1233                         assert(is_Call(new_call));
1234
1235                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1236                         list_add_tail(&new_entry->list, &env->calls);
1237                         maybe_push_call(pqueue, new_entry, inline_threshold);
1238                 }
1239
1240                 env->n_call_nodes += callee_env->n_call_nodes;
1241                 env->n_nodes += callee_env->n_nodes;
1242                 --callee_env->n_callers;
1243         }
1244         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1245         del_pqueue(pqueue);
1246 }
1247
1248 /*
1249  * Heuristic inliner. Calculates a benefice value for every call and inlines
1250  * those calls with a value higher than the threshold.
1251  */
1252 void inline_functions(unsigned maxsize, int inline_threshold,
1253                       opt_ptr after_inline_opt)
1254 {
1255         inline_irg_env   *env;
1256         size_t           i, n_irgs;
1257         ir_graph         *rem;
1258         wenv_t           wenv;
1259         pmap             *copied_graphs;
1260         pmap_entry       *pm_entry;
1261         ir_graph         **irgs;
1262
1263         rem = current_ir_graph;
1264         obstack_init(&temp_obst);
1265
1266         irgs = create_irg_list();
1267
1268         /* a map for the copied graphs, used to inline recursive calls */
1269         copied_graphs = pmap_create();
1270
1271         /* extend all irgs by a temporary data structure for inlining. */
1272         n_irgs = get_irp_n_irgs();
1273         for (i = 0; i < n_irgs; ++i)
1274                 set_irg_link(irgs[i], alloc_inline_irg_env());
1275
1276         /* Pre-compute information in temporary data structure. */
1277         wenv.ignore_runtime = 0;
1278         wenv.ignore_callers = 0;
1279         for (i = 0; i < n_irgs; ++i) {
1280                 ir_graph *irg = irgs[i];
1281
1282                 free_callee_info(irg);
1283
1284                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1285                 assure_loopinfo(irg);
1286                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1287         }
1288
1289         /* -- and now inline. -- */
1290         for (i = 0; i < n_irgs; ++i) {
1291                 ir_graph *irg = irgs[i];
1292
1293                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1294         }
1295
1296         for (i = 0; i < n_irgs; ++i) {
1297                 ir_graph *irg = irgs[i];
1298
1299                 env = (inline_irg_env*)get_irg_link(irg);
1300                 if (env->got_inline && after_inline_opt != NULL) {
1301                         /* this irg got calls inlined: optimize it */
1302                         after_inline_opt(irg);
1303                 }
1304                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1305                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1306                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1307                         env->n_callers_orig, env->n_callers,
1308                         get_entity_name(get_irg_entity(irg))));
1309                 }
1310         }
1311
1312         /* kill the copied graphs: we don't need them anymore */
1313         foreach_pmap(copied_graphs, pm_entry) {
1314                 ir_graph *copy = (ir_graph*)pm_entry->value;
1315
1316                 /* reset the entity, otherwise it will be deleted in the next step ... */
1317                 set_irg_entity(copy, NULL);
1318                 free_ir_graph(copy);
1319         }
1320         pmap_destroy(copied_graphs);
1321
1322         xfree(irgs);
1323
1324         obstack_free(&temp_obst, NULL);
1325         current_ir_graph = rem;
1326 }
1327
1328 typedef struct inline_functions_pass_t {
1329         ir_prog_pass_t pass;
1330         unsigned       maxsize;
1331         int            inline_threshold;
1332         opt_ptr        after_inline_opt;
1333 } inline_functions_pass_t;
1334
1335 /**
1336  * Wrapper to run inline_functions() as a ir_prog pass.
1337  */
1338 static int inline_functions_wrapper(ir_prog *irp, void *context)
1339 {
1340         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1341
1342         (void)irp;
1343         inline_functions(pass->maxsize, pass->inline_threshold,
1344                          pass->after_inline_opt);
1345         return 0;
1346 }
1347
1348 /* create a ir_prog pass for inline_functions */
1349 ir_prog_pass_t *inline_functions_pass(
1350           const char *name, unsigned maxsize, int inline_threshold,
1351           opt_ptr after_inline_opt)
1352 {
1353         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1354
1355         pass->maxsize          = maxsize;
1356         pass->inline_threshold = inline_threshold;
1357         pass->after_inline_opt = after_inline_opt;
1358
1359         return def_prog_pass_constructor(
1360                 &pass->pass, name ? name : "inline_functions",
1361                 inline_functions_wrapper);
1362 }
1363
1364 void firm_init_inline(void)
1365 {
1366         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1367 }