nsz Git - libfirm/blob - ir/opt/opt_inline.c

   1 /*
   2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief    Dead node elimination and Procedure Inlining.
  23  * @author   Michael Beck, Goetz Lindenmaier
  24  */
  25 #include "config.h"
  26
  27 #include <limits.h>
  28 #include <stdbool.h>
  29 #include <assert.h>
  30
  31 #include "irnode_t.h"
  32 #include "irgraph_t.h"
  33 #include "irprog_t.h"
  34
  35 #include "iroptimize.h"
  36 #include "ircons_t.h"
  37 #include "iropt_t.h"
  38 #include "irgopt.h"
  39 #include "irgmod.h"
  40 #include "irgwalk.h"
  41
  42 #include "array_t.h"
  43 #include "list.h"
  44 #include "pset.h"
  45 #include "pmap.h"
  46 #include "pdeq.h"
  47 #include "xmalloc.h"
  48 #include "pqueue.h"
  49
  50 #include "irouts.h"
  51 #include "irloop_t.h"
  52 #include "irbackedge_t.h"
  53 #include "opt_init.h"
  54 #include "cgana.h"
  55 #include "trouts.h"
  56 #include "error.h"
  57
  58 #include "analyze_irg_args.h"
  59 #include "iredges_t.h"
  60 #include "irflag_t.h"
  61 #include "irhooks.h"
  62 #include "irtools.h"
  63 #include "iropt_dbg.h"
  64 #include "irpass_t.h"
  65 #include "irnodemap.h"
  66
  67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  68
  69 /*------------------------------------------------------------------*/
  70 /* Routines for dead node elimination / copying garbage collection  */
  71 /* of the obstack.                                                  */
  72 /*------------------------------------------------------------------*/
  73
  74 /**
  75  * Remember the new node in the old node by using a field all nodes have.
  76  */
  77 static void set_new_node(ir_node *node, ir_node *new_node)
  78 {
  79         set_irn_link(node, new_node);
  80 }
  81
  82 /**
  83  * Get this new node, before the old node is forgotten.
  84  */
  85 static inline ir_node *get_new_node(ir_node *old_node)
  86 {
  87         assert(irn_visited(old_node));
  88         return (ir_node*) get_irn_link(old_node);
  89 }
  90
  91 /*--------------------------------------------------------------------*/
  92 /*  Functionality for inlining                                         */
  93 /*--------------------------------------------------------------------*/
  94
  95 /**
  96  * Copy node for inlineing.  Updates attributes that change when
  97  * inlineing but not for dead node elimination.
  98  *
  99  * Copies the node by calling copy_node() and then updates the entity if
 100  * it's a local one.  env must be a pointer of the frame type of the
 101  * inlined procedure. The new entities must be in the link field of
 102  * the entities.
 103  */
 104 static void copy_node_inline(ir_node *node, void *env)
 105 {
 106         ir_graph *new_irg  = (ir_graph*) env;
 107         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
 108
 109         set_new_node(node, new_node);
 110         if (is_Sel(node)) {
 111                 ir_graph  *old_irg        = get_irn_irg(node);
 112                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
 113                 ir_entity *old_entity     = get_Sel_entity(node);
 114                 assert(is_Sel(new_node));
 115                 /* use copied entities from the new frame */
 116                 if (get_entity_owner(old_entity) == old_frame_type) {
 117                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
 118                         assert(new_entity != NULL);
 119                         set_Sel_entity(new_node, new_entity);
 120                 }
 121         } else if (is_Block(new_node)) {
 122                 new_node->attr.block.irg.irg = new_irg;
 123         }
 124 }
 125
 126 static void set_preds_inline(ir_node *node, void *env)
 127 {
 128         ir_node *new_node;
 129
 130         irn_rewire_inputs(node);
 131
 132         /* move constants into start block */
 133         new_node = get_new_node(node);
 134         if (is_irn_constlike(new_node)) {
 135                 ir_graph *new_irg     = (ir_graph *) env;
 136                 ir_node  *start_block = get_irg_start_block(new_irg);
 137                 set_nodes_block(new_node, start_block);
 138         }
 139 }
 140
 141 /**
 142  * Walker: checks if P_value_arg_base is used.
 143  */
 144 static void find_addr(ir_node *node, void *env)
 145 {
 146         bool *allow_inline = (bool*)env;
 147
 148         if (is_Block(node) && get_Block_entity(node)) {
 149                 /**
 150                  * Currently we can't handle blocks whose address was taken correctly
 151                  * when inlining
 152                  */
 153                 *allow_inline = false;
 154         } else if (is_Sel(node)) {
 155                 ir_graph *irg = current_ir_graph;
 156                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
 157                         /* access to frame */
 158                         ir_entity *ent = get_Sel_entity(node);
 159                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
 160                                 /* access to value_type */
 161                                 *allow_inline = false;
 162                         }
 163                         if (is_parameter_entity(ent)) {
 164                                 *allow_inline = false;
 165                         }
 166                 }
 167         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
 168                 /* From GCC:
 169                  * Refuse to inline alloca call unless user explicitly forced so as this
 170                  * may change program's memory overhead drastically when the function
 171                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
 172                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
 173                  *
 174                  * Sorrily this is true with our implementation also.
 175                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
 176                  * this disables inlining of functions using VLA (which are completely
 177                  * save).
 178                  *
 179                  * 2 Solutions:
 180                  * - add a flag to the Alloc node for "real" alloca() calls
 181                  * - add a new Stack-Restore node at the end of a function using
 182                  *   alloca()
 183                  */
 184                 *allow_inline = false;
 185         }
 186 }
 187
 188 /**
 189  * Check if we can inline a given call.
 190  * Currently, we cannot inline two cases:
 191  * - call with compound arguments
 192  * - graphs that take the address of a parameter
 193  *
 194  * check these conditions here
 195  */
 196 static bool can_inline(ir_node *call, ir_graph *called_graph)
 197 {
 198         ir_entity          *called      = get_irg_entity(called_graph);
 199         ir_type            *called_type = get_entity_type(called);
 200         ir_type            *call_type   = get_Call_type(call);
 201         size_t              n_params    = get_method_n_params(called_type);
 202         size_t              n_arguments = get_method_n_params(call_type);
 203         size_t              n_res       = get_method_n_ress(called_type);
 204         irg_inline_property prop        = get_irg_inline_property(called_graph);
 205         size_t              i;
 206         bool                res;
 207
 208         if (prop == irg_inline_forbidden)
 209                 return false;
 210
 211         if (n_arguments != n_params) {
 212                 /* this is a bad feature of C: without a prototype, we can
 213                  * call a function with less parameters than needed. Currently
 214                  * we don't support this, although we could use Unknown than. */
 215                 return false;
 216         }
 217         if (n_res != get_method_n_ress(call_type)) {
 218                 return false;
 219         }
 220
 221         /* Argh, compiling C has some bad consequences:
 222          * It is implementation dependent what happens in that case.
 223          * We support inlining, if the bitsize of the types matches AND
 224          * the same arithmetic is used. */
 225         for (i = 0; i < n_params; ++i) {
 226                 ir_type *param_tp = get_method_param_type(called_type, i);
 227                 ir_type *arg_tp   = get_method_param_type(call_type, i);
 228
 229                 if (param_tp != arg_tp) {
 230                         ir_mode *pmode = get_type_mode(param_tp);
 231                         ir_mode *amode = get_type_mode(arg_tp);
 232
 233                         if (pmode == NULL || amode == NULL)
 234                                 return false;
 235                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
 236                                 return false;
 237                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
 238                                 return false;
 239                         /* otherwise we can simply "reinterpret" the bits */
 240                 }
 241         }
 242         for (i = 0; i < n_res; ++i) {
 243                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
 244                 ir_type *used_res_tp = get_method_res_type(call_type, i);
 245
 246                 if (decl_res_tp != used_res_tp) {
 247                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
 248                         ir_mode *used_mode = get_type_mode(used_res_tp);
 249                         if (decl_mode == NULL || used_mode == NULL)
 250                                 return false;
 251                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
 252                                 return false;
 253                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
 254                                 return false;
 255                         /* otherwise we can "reinterpret" the bits */
 256                 }
 257         }
 258
 259         /* check parameters for compound arguments */
 260         for (i = 0; i < n_params; ++i) {
 261                 ir_type *p_type = get_method_param_type(call_type, i);
 262
 263                 if (is_compound_type(p_type))
 264                         return false;
 265         }
 266
 267         /* check results for compound arguments */
 268         for (i = 0; i < n_res; ++i) {
 269                 ir_type *r_type = get_method_res_type(call_type, i);
 270
 271                 if (is_compound_type(r_type))
 272                         return false;
 273         }
 274
 275         res = true;
 276         irg_walk_graph(called_graph, find_addr, NULL, &res);
 277
 278         return res;
 279 }
 280
 281 enum exc_mode {
 282         exc_handler,    /**< There is a handler. */
 283         exc_no_handler  /**< Exception handling not represented. */
 284 };
 285
 286 /**
 287  * copy all entities on the stack frame on 1 irg to the stackframe of another.
 288  * Sets entity links of the old entities to the copies
 289  */
 290 static void copy_frame_entities(ir_graph *from, ir_graph *to)
 291 {
 292         ir_type *from_frame = get_irg_frame_type(from);
 293         ir_type *to_frame   = get_irg_frame_type(to);
 294         size_t   n_members  = get_class_n_members(from_frame);
 295         size_t   i;
 296         assert(from_frame != to_frame);
 297
 298         for (i = 0; i < n_members; ++i) {
 299                 ir_entity *old_ent = get_class_member(from_frame, i);
 300                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
 301                 set_entity_link(old_ent, new_ent);
 302                 assert (!is_parameter_entity(old_ent));
 303         }
 304 }
 305
 306 /* Inlines a method at the given call site. */
 307 int inline_method(ir_node *call, ir_graph *called_graph)
 308 {
 309         ir_node       *pre_call;
 310         ir_node       *post_call, *post_bl;
 311         ir_node       *in[pn_Start_max+1];
 312         ir_node       *end, *end_bl, *block;
 313         ir_node       **res_pred;
 314         ir_node       **cf_pred;
 315         ir_node       **args_in;
 316         ir_node       *ret, *phi;
 317         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
 318         int           irn_arity, n_params;
 319         int           n_mem_phi;
 320         enum exc_mode exc_handling;
 321         ir_type       *mtp;
 322         ir_type       *ctp;
 323         ir_entity     *ent;
 324         ir_graph      *rem;
 325         ir_graph      *irg = get_irn_irg(call);
 326
 327         /* we cannot inline some types of calls */
 328         if (! can_inline(call, called_graph))
 329                 return 0;
 330
 331         /* We cannot inline a recursive call. The graph must be copied before
 332          * the call the inline_method() using create_irg_copy(). */
 333         if (called_graph == irg)
 334                 return 0;
 335
 336         ent      = get_irg_entity(called_graph);
 337         mtp      = get_entity_type(ent);
 338         ctp      = get_Call_type(call);
 339         n_params = get_method_n_params(mtp);
 340         n_res    = get_method_n_ress(mtp);
 341
 342         rem = current_ir_graph;
 343         current_ir_graph = irg;
 344
 345         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
 346
 347         /* optimizations can cause problems when allocating new nodes */
 348         rem_opt = get_opt_optimize();
 349         set_optimize(0);
 350
 351         /* Handle graph state */
 352         assert(get_irg_phase_state(irg) != phase_building);
 353         assert(get_irg_pinned(irg) == op_pin_state_pinned);
 354         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
 355         clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_DOMINANCE
 356                            | IR_GRAPH_STATE_VALID_EXTENDED_BLOCKS
 357                            | IR_GRAPH_STATE_CONSISTENT_ENTITY_USAGE);
 358         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 359         clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_ENTITY_USAGE);
 360         edges_deactivate(irg);
 361
 362         /* here we know we WILL inline, so inform the statistics */
 363         hook_inline(call, called_graph);
 364
 365         /* -- Decide how to handle exception control flow: Is there a handler
 366            for the Call node, or do we branch directly to End on an exception?
 367            exc_handling:
 368            0 There is a handler.
 369            2 Exception handling not represented in Firm. -- */
 370         {
 371                 ir_node *Xproj = NULL;
 372                 ir_node *proj;
 373                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
 374                      proj = (ir_node*)get_irn_link(proj)) {
 375                         long proj_nr = get_Proj_proj(proj);
 376                         if (proj_nr == pn_Call_X_except) Xproj = proj;
 377                 }
 378                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
 379         }
 380
 381         /* create the argument tuple */
 382         args_in = ALLOCAN(ir_node*, n_params);
 383
 384         block = get_nodes_block(call);
 385         for (i = n_params - 1; i >= 0; --i) {
 386                 ir_node *arg      = get_Call_param(call, i);
 387                 ir_type *param_tp = get_method_param_type(mtp, i);
 388                 ir_mode *mode     = get_type_mode(param_tp);
 389
 390                 if (mode != get_irn_mode(arg)) {
 391                         arg = new_r_Conv(block, arg, mode);
 392                 }
 393                 args_in[i] = arg;
 394         }
 395
 396         /* the procedure and later replaces the Start node of the called graph.
 397          * Post_call is the old Call node and collects the results of the called
 398          * graph. Both will end up being a tuple. */
 399         post_bl = get_nodes_block(call);
 400         /* XxMxPxPxPxT of Start + parameter of Call */
 401         in[pn_Start_M]              = get_Call_mem(call);
 402         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
 403         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
 404         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
 405         pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
 406         post_call = call;
 407
 408         /* --
 409            The new block gets the ins of the old block, pre_call and all its
 410            predecessors and all Phi nodes. -- */
 411         part_block(pre_call);
 412
 413         /* increment visited flag for later walk */
 414         inc_irg_visited(called_graph);
 415
 416         /* link some nodes to nodes in the current graph so instead of copying
 417          * the linked nodes will get used.
 418          * So the copier will use the created Tuple instead of copying the start
 419          * node, similar for singleton nodes like NoMem and Bad.
 420          * Note: this will prohibit predecessors to be copied - only do it for
 421          *       nodes without predecessors */
 422         {
 423                 ir_node *start_block;
 424                 ir_node *start;
 425                 ir_node *nomem;
 426
 427                 start_block = get_irg_start_block(called_graph);
 428                 set_new_node(start_block, get_nodes_block(pre_call));
 429                 mark_irn_visited(start_block);
 430
 431                 start = get_irg_start(called_graph);
 432                 set_new_node(start, pre_call);
 433                 mark_irn_visited(start);
 434
 435                 nomem = get_irg_no_mem(called_graph);
 436                 set_new_node(nomem, get_irg_no_mem(irg));
 437                 mark_irn_visited(nomem);
 438         }
 439
 440         /* entitiy link is used to link entities on old stackframe to the
 441          * new stackframe */
 442         irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
 443
 444         /* copy entities and nodes */
 445         assert(!irn_visited(get_irg_end(called_graph)));
 446         copy_frame_entities(called_graph, irg);
 447         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
 448                       irg);
 449
 450         irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
 451
 452         /* -- Merge the end of the inlined procedure with the call site -- */
 453         /* We will turn the old Call node into a Tuple with the following
 454            predecessors:
 455            -1:  Block of Tuple.
 456            0: Phi of all Memories of Return statements.
 457            1: Jmp from new Block that merges the control flow from all exception
 458            predecessors of the old end block.
 459            2: Tuple of all arguments.
 460            3: Phi of Exception memories.
 461            In case the old Call directly branches to End on an exception we don't
 462            need the block merging all exceptions nor the Phi of the exception
 463            memories.
 464         */
 465
 466         /* Precompute some values */
 467         end_bl = get_new_node(get_irg_end_block(called_graph));
 468         end    = get_new_node(get_irg_end(called_graph));
 469         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
 470         n_res  = get_method_n_ress(get_Call_type(call));
 471
 472         res_pred = XMALLOCN(ir_node*, n_res);
 473         cf_pred  = XMALLOCN(ir_node*, arity);
 474
 475         /* archive keepalives */
 476         irn_arity = get_irn_arity(end);
 477         for (i = 0; i < irn_arity; i++) {
 478                 ir_node *ka = get_End_keepalive(end, i);
 479                 if (! is_Bad(ka))
 480                         add_End_keepalive(get_irg_end(irg), ka);
 481         }
 482
 483         /* replace Return nodes by Jump nodes */
 484         n_ret = 0;
 485         for (i = 0; i < arity; i++) {
 486                 ir_node *ret;
 487                 ret = get_Block_cfgpred(end_bl, i);
 488                 if (is_Return(ret)) {
 489                         ir_node *block = get_nodes_block(ret);
 490                         cf_pred[n_ret] = new_r_Jmp(block);
 491                         n_ret++;
 492                 }
 493         }
 494         set_irn_in(post_bl, n_ret, cf_pred);
 495
 496         /* build a Tuple for all results of the method.
 497          * add Phi node if there was more than one Return. */
 498         turn_into_tuple(post_call, pn_Call_max+1);
 499         /* First the Memory-Phi */
 500         n_mem_phi = 0;
 501         for (i = 0; i < arity; i++) {
 502                 ret = get_Block_cfgpred(end_bl, i);
 503                 if (is_Return(ret)) {
 504                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
 505                 }
 506                 /* memory output for some exceptions is directly connected to End */
 507                 if (is_Call(ret)) {
 508                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
 509                 } else if (is_fragile_op(ret)) {
 510                         /* We rely that all cfops have the memory output at the same position. */
 511                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
 512                 } else if (is_Raise(ret)) {
 513                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
 514                 }
 515         }
 516         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
 517         set_Tuple_pred(call, pn_Call_M, phi);
 518         /* Conserve Phi-list for further inlinings -- but might be optimized */
 519         if (get_nodes_block(phi) == post_bl) {
 520                 set_irn_link(phi, get_irn_link(post_bl));
 521                 set_irn_link(post_bl, phi);
 522         }
 523         /* Now the real results */
 524         if (n_res > 0) {
 525                 ir_node *result_tuple;
 526                 for (j = 0; j < n_res; j++) {
 527                         ir_type *res_type = get_method_res_type(ctp, j);
 528                         ir_mode *res_mode = get_type_mode(res_type);
 529                         n_ret = 0;
 530                         for (i = 0; i < arity; i++) {
 531                                 ret = get_Block_cfgpred(end_bl, i);
 532                                 if (is_Return(ret)) {
 533                                         ir_node *res = get_Return_res(ret, j);
 534                                         if (get_irn_mode(res) != res_mode) {
 535                                                 ir_node *block = get_nodes_block(res);
 536                                                 res = new_r_Conv(block, res, res_mode);
 537                                         }
 538                                         cf_pred[n_ret] = res;
 539                                         n_ret++;
 540                                 }
 541                         }
 542                         if (n_ret > 0) {
 543                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
 544                         } else {
 545                                 phi = new_r_Bad(irg, res_mode);
 546                         }
 547                         res_pred[j] = phi;
 548                         /* Conserve Phi-list for further inlinings -- but might be optimized */
 549                         if (get_nodes_block(phi) == post_bl) {
 550                                 set_Phi_next(phi, get_Block_phis(post_bl));
 551                                 set_Block_phis(post_bl, phi);
 552                         }
 553                 }
 554                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
 555                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
 556         } else {
 557                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
 558         }
 559         /* handle the regular call */
 560         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
 561
 562         /* Finally the exception control flow.
 563            We have two possible situations:
 564            First if the Call branches to an exception handler:
 565            We need to add a Phi node to
 566            collect the memory containing the exception objects.  Further we need
 567            to add another block to get a correct representation of this Phi.  To
 568            this block we add a Jmp that resolves into the X output of the Call
 569            when the Call is turned into a tuple.
 570            Second: There is no exception edge. Just add all inlined exception
 571            branches to the End node.
 572          */
 573         if (exc_handling == exc_handler) {
 574                 n_exc = 0;
 575                 for (i = 0; i < arity; i++) {
 576                         ir_node *ret, *irn;
 577                         ret = get_Block_cfgpred(end_bl, i);
 578                         irn = skip_Proj(ret);
 579                         if (is_fragile_op(irn) || is_Raise(irn)) {
 580                                 cf_pred[n_exc] = ret;
 581                                 ++n_exc;
 582                         }
 583                 }
 584                 if (n_exc > 0) {
 585                         if (n_exc == 1) {
 586                                 /* simple fix */
 587                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
 588                         } else {
 589                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
 590                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
 591                         }
 592                 } else {
 593                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
 594                 }
 595         } else {
 596                 ir_node *main_end_bl;
 597                 int main_end_bl_arity;
 598                 ir_node **end_preds;
 599
 600                 /* assert(exc_handling == 1 || no exceptions. ) */
 601                 n_exc = 0;
 602                 for (i = 0; i < arity; i++) {
 603                         ir_node *ret = get_Block_cfgpred(end_bl, i);
 604                         ir_node *irn = skip_Proj(ret);
 605
 606                         if (is_fragile_op(irn) || is_Raise(irn)) {
 607                                 cf_pred[n_exc] = ret;
 608                                 n_exc++;
 609                         }
 610                 }
 611                 main_end_bl       = get_irg_end_block(irg);
 612                 main_end_bl_arity = get_irn_arity(main_end_bl);
 613                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
 614
 615                 for (i = 0; i < main_end_bl_arity; ++i)
 616                         end_preds[i] = get_irn_n(main_end_bl, i);
 617                 for (i = 0; i < n_exc; ++i)
 618                         end_preds[main_end_bl_arity + i] = cf_pred[i];
 619                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
 620                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
 621                 free(end_preds);
 622         }
 623         free(res_pred);
 624         free(cf_pred);
 625
 626         /* --  Turn CSE back on. -- */
 627         set_optimize(rem_opt);
 628         current_ir_graph = rem;
 629
 630         return 1;
 631 }
 632
 633 /********************************************************************/
 634 /* Apply inlining to small methods.                                 */
 635 /********************************************************************/
 636
 637 static struct obstack  temp_obst;
 638
 639 /** Represents a possible inlinable call in a graph. */
 640 typedef struct call_entry {
 641         ir_node    *call;       /**< The Call node. */
 642         ir_graph   *callee;     /**< The callee IR-graph. */
 643         list_head  list;        /**< List head for linking the next one. */
 644         int        loop_depth;  /**< The loop depth of this call. */
 645         int        benefice;    /**< The calculated benefice of this call. */
 646         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
 647         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
 648 } call_entry;
 649
 650 /**
 651  * environment for inlining small irgs
 652  */
 653 typedef struct inline_env_t {
 654         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
 655         list_head      calls; /**< The call entry list. */
 656 } inline_env_t;
 657
 658 /**
 659  * Returns the irg called from a Call node. If the irg is not
 660  * known, NULL is returned.
 661  *
 662  * @param call  the call node
 663  */
 664 static ir_graph *get_call_called_irg(ir_node *call)
 665 {
 666         ir_node *addr;
 667
 668         addr = get_Call_ptr(call);
 669         if (is_SymConst_addr_ent(addr)) {
 670                 ir_entity *ent = get_SymConst_entity(addr);
 671                 /* we don't know which function gets finally bound to a weak symbol */
 672                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
 673                         return NULL;
 674
 675                 return get_entity_irg(ent);
 676         }
 677
 678         return NULL;
 679 }
 680
 681 /**
 682  * Walker: Collect all calls to known graphs inside a graph.
 683  */
 684 static void collect_calls(ir_node *call, void *env)
 685 {
 686         (void) env;
 687         if (is_Call(call)) {
 688                 ir_graph *called_irg = get_call_called_irg(call);
 689
 690                 if (called_irg != NULL) {
 691                         /* The Call node calls a locally defined method.  Remember to inline. */
 692                         inline_env_t *ienv  = (inline_env_t*)env;
 693                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
 694                         entry->call       = call;
 695                         entry->callee     = called_irg;
 696                         entry->loop_depth = 0;
 697                         entry->benefice   = 0;
 698                         entry->local_adr  = 0;
 699                         entry->all_const  = 0;
 700
 701                         list_add_tail(&entry->list, &ienv->calls);
 702                 }
 703         }
 704 }
 705
 706 /**
 707  * Inlines all small methods at call sites where the called address comes
 708  * from a Const node that references the entity representing the called
 709  * method.
 710  * The size argument is a rough measure for the code size of the method:
 711  * Methods where the obstack containing the firm graph is smaller than
 712  * size are inlined.
 713  */
 714 void inline_small_irgs(ir_graph *irg, int size)
 715 {
 716         ir_graph *rem = current_ir_graph;
 717         inline_env_t env;
 718         call_entry *entry;
 719
 720         current_ir_graph = irg;
 721         /* Handle graph state */
 722         assert(get_irg_phase_state(irg) != phase_building);
 723         free_callee_info(irg);
 724
 725         /* Find Call nodes to inline.
 726            (We can not inline during a walk of the graph, as inlining the same
 727            method several times changes the visited flag of the walked graph:
 728            after the first inlining visited of the callee equals visited of
 729            the caller.  With the next inlining both are increased.) */
 730         obstack_init(&env.obst);
 731         INIT_LIST_HEAD(&env.calls);
 732         irg_walk_graph(irg, NULL, collect_calls, &env);
 733
 734         if (! list_empty(&env.calls)) {
 735                 /* There are calls to inline */
 736                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
 737                 collect_phiprojs(irg);
 738
 739                 list_for_each_entry(call_entry, entry, &env.calls, list) {
 740                         ir_graph            *callee = entry->callee;
 741                         irg_inline_property prop    = get_irg_inline_property(callee);
 742
 743                         if (prop == irg_inline_forbidden) {
 744                                 continue;
 745                         }
 746
 747                         if (prop >= irg_inline_forced ||
 748                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
 749                                 inline_method(entry->call, callee);
 750                         }
 751                 }
 752                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
 753         }
 754         obstack_free(&env.obst, NULL);
 755         current_ir_graph = rem;
 756 }
 757
 758 typedef struct inline_small_irgs_pass_t {
 759         ir_graph_pass_t pass;
 760         int            size;
 761 } inline_small_irgs_pass_t;
 762
 763 /**
 764  * Wrapper to run inline_small_irgs() as a pass.
 765  */
 766 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
 767 {
 768         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
 769
 770         inline_small_irgs(irg, pass->size);
 771         return 0;
 772 }
 773
 774 /* create a pass for inline_small_irgs() */
 775 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
 776 {
 777         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
 778
 779         pass->size = size;
 780         return def_graph_pass_constructor(
 781                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
 782 }
 783
 784 /**
 785  * Environment for inlining irgs.
 786  */
 787 typedef struct {
 788         list_head calls;             /**< List of of all call nodes in this graph. */
 789         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
 790         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
 791         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
 792         unsigned  n_nodes_orig;      /**< for statistics */
 793         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
 794         unsigned  n_call_nodes_orig; /**< for statistics */
 795         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
 796         unsigned  n_callers_orig;    /**< for statistics */
 797         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
 798         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
 799 } inline_irg_env;
 800
 801 /**
 802  * Allocate a new environment for inlining.
 803  */
 804 static inline_irg_env *alloc_inline_irg_env(void)
 805 {
 806         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
 807         INIT_LIST_HEAD(&env->calls);
 808         env->local_weights     = NULL;
 809         env->n_nodes           = -2; /* do not count count Start, End */
 810         env->n_blocks          = -2; /* do not count count Start, End Block */
 811         env->n_nodes_orig      = -2; /* do not count Start, End */
 812         env->n_call_nodes      = 0;
 813         env->n_call_nodes_orig = 0;
 814         env->n_callers         = 0;
 815         env->n_callers_orig    = 0;
 816         env->got_inline        = 0;
 817         env->recursive         = 0;
 818         return env;
 819 }
 820
 821 typedef struct walker_env {
 822         inline_irg_env *x;     /**< the inline environment */
 823         char ignore_runtime;   /**< the ignore runtime flag */
 824         char ignore_callers;   /**< if set, do change callers data */
 825 } wenv_t;
 826
 827 /**
 828  * post-walker: collect all calls in the inline-environment
 829  * of a graph and sum some statistics.
 830  */
 831 static void collect_calls2(ir_node *call, void *ctx)
 832 {
 833         wenv_t         *env = (wenv_t*)ctx;
 834         inline_irg_env *x = env->x;
 835         unsigned        code = get_irn_opcode(call);
 836         ir_graph       *callee;
 837         call_entry     *entry;
 838
 839         /* count meaningful nodes in irg */
 840         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
 841                 if (code != iro_Block) {
 842                         ++x->n_nodes;
 843                         ++x->n_nodes_orig;
 844                 } else {
 845                         ++x->n_blocks;
 846                 }
 847         }
 848
 849         if (code != iro_Call) return;
 850
 851         /* check, if it's a runtime call */
 852         if (env->ignore_runtime) {
 853                 ir_node *symc = get_Call_ptr(call);
 854
 855                 if (is_SymConst_addr_ent(symc)) {
 856                         ir_entity *ent = get_SymConst_entity(symc);
 857
 858                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
 859                                 return;
 860                 }
 861         }
 862
 863         /* collect all call nodes */
 864         ++x->n_call_nodes;
 865         ++x->n_call_nodes_orig;
 866
 867         callee = get_call_called_irg(call);
 868         if (callee != NULL) {
 869                 if (! env->ignore_callers) {
 870                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
 871                         /* count all static callers */
 872                         ++callee_env->n_callers;
 873                         ++callee_env->n_callers_orig;
 874                 }
 875                 if (callee == current_ir_graph)
 876                         x->recursive = 1;
 877
 878                 /* link it in the list of possible inlinable entries */
 879                 entry = OALLOC(&temp_obst, call_entry);
 880                 entry->call       = call;
 881                 entry->callee     = callee;
 882                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
 883                 entry->benefice   = 0;
 884                 entry->local_adr  = 0;
 885                 entry->all_const  = 0;
 886
 887                 list_add_tail(&entry->list, &x->calls);
 888         }
 889 }
 890
 891 /**
 892  * Returns TRUE if the number of callers is 0 in the irg's environment,
 893  * hence this irg is a leaf.
 894  */
 895 inline static int is_leaf(ir_graph *irg)
 896 {
 897         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
 898         return env->n_call_nodes == 0;
 899 }
 900
 901 /**
 902  * Returns TRUE if the number of nodes in the callee is
 903  * smaller then size in the irg's environment.
 904  */
 905 inline static int is_smaller(ir_graph *callee, unsigned size)
 906 {
 907         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
 908         return env->n_nodes < size;
 909 }
 910
 911 /**
 912  * Duplicate a call entry.
 913  *
 914  * @param entry     the original entry to duplicate
 915  * @param new_call  the new call node
 916  * @param loop_depth_delta
 917  *                  delta value for the loop depth
 918  */
 919 static call_entry *duplicate_call_entry(const call_entry *entry,
 920                                         ir_node *new_call, int loop_depth_delta)
 921 {
 922         call_entry *nentry = OALLOC(&temp_obst, call_entry);
 923         nentry->call       = new_call;
 924         nentry->callee     = entry->callee;
 925         nentry->benefice   = entry->benefice;
 926         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
 927         nentry->local_adr  = entry->local_adr;
 928         nentry->all_const  = entry->all_const;
 929
 930         return nentry;
 931 }
 932
 933 /**
 934  * Append all call nodes of the source environment to the nodes of in the destination
 935  * environment.
 936  *
 937  * @param dst         destination environment
 938  * @param src         source environment
 939  * @param loop_depth  the loop depth of the call that is replaced by the src list
 940  */
 941 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
 942 {
 943         call_entry *entry, *nentry;
 944
 945         /* Note that the src list points to Call nodes in the inlined graph, but
 946            we need Call nodes in our graph. Luckily the inliner leaves this information
 947            in the link field. */
 948         list_for_each_entry(call_entry, entry, &src->calls, list) {
 949                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
 950                 list_add_tail(&nentry->list, &dst->calls);
 951         }
 952         dst->n_call_nodes += src->n_call_nodes;
 953         dst->n_nodes      += src->n_nodes;
 954 }
 955
 956 /*
 957  * Inlines small leaf methods at call sites where the called address comes
 958  * from a Const node that references the entity representing the called
 959  * method.
 960  * The size argument is a rough measure for the code size of the method:
 961  * Methods where the obstack containing the firm graph is smaller than
 962  * size are inlined.
 963  */
 964 void inline_leaf_functions(unsigned maxsize, unsigned leafsize,
 965                             unsigned size, int ignore_runtime)
 966 {
 967         inline_irg_env   *env;
 968         ir_graph         *irg;
 969         size_t           i, n_irgs;
 970         ir_graph         *rem;
 971         int              did_inline;
 972         wenv_t           wenv;
 973         call_entry       *entry, *next;
 974         const call_entry *centry;
 975         pmap             *copied_graphs;
 976         pmap_entry       *pm_entry;
 977
 978         rem = current_ir_graph;
 979         obstack_init(&temp_obst);
 980
 981         /* a map for the copied graphs, used to inline recursive calls */
 982         copied_graphs = pmap_create();
 983
 984         /* extend all irgs by a temporary data structure for inlining. */
 985         n_irgs = get_irp_n_irgs();
 986         for (i = 0; i < n_irgs; ++i)
 987                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
 988
 989         /* Pre-compute information in temporary data structure. */
 990         wenv.ignore_runtime = ignore_runtime;
 991         wenv.ignore_callers = 0;
 992         for (i = 0; i < n_irgs; ++i) {
 993                 ir_graph *irg = get_irp_irg(i);
 994
 995                 assert(get_irg_phase_state(irg) != phase_building);
 996                 free_callee_info(irg);
 997
 998                 assure_loopinfo(irg);
 999                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1000                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1001         }
1002
1003         /* -- and now inline. -- */
1004
1005         /* Inline leafs recursively -- we might construct new leafs. */
1006         do {
1007                 did_inline = 0;
1008
1009                 for (i = 0; i < n_irgs; ++i) {
1010                         ir_node *call;
1011                         int phiproj_computed = 0;
1012
1013                         current_ir_graph = get_irp_irg(i);
1014                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1015
1016                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1017                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1018                                 ir_graph            *callee;
1019                                 irg_inline_property  prop;
1020
1021                                 if (env->n_nodes > maxsize)
1022                                         break;
1023
1024                                 call   = entry->call;
1025                                 callee = entry->callee;
1026
1027                                 prop = get_irg_inline_property(callee);
1028                                 if (prop == irg_inline_forbidden) {
1029                                         continue;
1030                                 }
1031
1032                                 if (is_leaf(callee) && (
1033                                     is_smaller(callee, leafsize) || prop >= irg_inline_forced)) {
1034                                         if (!phiproj_computed) {
1035                                                 phiproj_computed = 1;
1036                                                 collect_phiprojs(current_ir_graph);
1037                                         }
1038                                         did_inline = inline_method(call, callee);
1039
1040                                         if (did_inline) {
1041                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1042
1043                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1044                                                 phiproj_computed = 0;
1045
1046                                                 /* Do some statistics */
1047                                                 env->got_inline = 1;
1048                                                 --env->n_call_nodes;
1049                                                 env->n_nodes += callee_env->n_nodes;
1050                                                 --callee_env->n_callers;
1051
1052                                                 /* remove this call from the list */
1053                                                 list_del(&entry->list);
1054                                                 continue;
1055                                         }
1056                                 }
1057                         }
1058                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1059                 }
1060         } while (did_inline);
1061
1062         /* inline other small functions. */
1063         for (i = 0; i < n_irgs; ++i) {
1064                 ir_node *call;
1065                 int phiproj_computed = 0;
1066
1067                 current_ir_graph = get_irp_irg(i);
1068                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1069
1070                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1071
1072                 /* note that the list of possible calls is updated during the process */
1073                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1074                         irg_inline_property prop;
1075                         ir_graph            *callee;
1076                         ir_graph            *calleee;
1077
1078                         call   = entry->call;
1079                         callee = entry->callee;
1080
1081                         prop = get_irg_inline_property(callee);
1082                         if (prop == irg_inline_forbidden) {
1083                                 continue;
1084                         }
1085
1086                         calleee = (ir_graph*)pmap_get(copied_graphs, callee);
1087                         if (calleee != NULL) {
1088                                 /*
1089                                  * Remap callee if we have a copy.
1090                                  * FIXME: Should we do this only for recursive Calls ?
1091                                  */
1092                                 callee = calleee;
1093                         }
1094
1095                         if (prop >= irg_inline_forced ||
1096                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1097                                 if (current_ir_graph == callee) {
1098                                         /*
1099                                          * Recursive call: we cannot directly inline because we cannot walk
1100                                          * the graph and change it. So we have to make a copy of the graph
1101                                          * first.
1102                                          */
1103
1104                                         inline_irg_env *callee_env;
1105                                         ir_graph       *copy;
1106
1107                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1108
1109                                         /*
1110                                          * No copy yet, create one.
1111                                          * Note that recursive methods are never leafs, so it is sufficient
1112                                          * to test this condition here.
1113                                          */
1114                                         copy = create_irg_copy(callee);
1115
1116                                         /* create_irg_copy() destroys the Proj links, recompute them */
1117                                         phiproj_computed = 0;
1118
1119                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1120
1121                                         /* allocate new environment */
1122                                         callee_env = alloc_inline_irg_env();
1123                                         set_irg_link(copy, callee_env);
1124
1125                                         assure_loopinfo(copy);
1126                                         wenv.x              = callee_env;
1127                                         wenv.ignore_callers = 1;
1128                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1129
1130                                         /*
1131                                          * Enter the entity of the original graph. This is needed
1132                                          * for inline_method(). However, note that ent->irg still points
1133                                          * to callee, NOT to copy.
1134                                          */
1135                                         set_irg_entity(copy, get_irg_entity(callee));
1136
1137                                         pmap_insert(copied_graphs, callee, copy);
1138                                         callee = copy;
1139
1140                                         /* we have only one caller: the original graph */
1141                                         callee_env->n_callers      = 1;
1142                                         callee_env->n_callers_orig = 1;
1143                                 }
1144                                 if (! phiproj_computed) {
1145                                         phiproj_computed = 1;
1146                                         collect_phiprojs(current_ir_graph);
1147                                 }
1148                                 did_inline = inline_method(call, callee);
1149                                 if (did_inline) {
1150                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1151
1152                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1153                                         phiproj_computed = 0;
1154
1155                                         /* callee was inline. Append its call list. */
1156                                         env->got_inline = 1;
1157                                         --env->n_call_nodes;
1158                                         append_call_list(env, callee_env, entry->loop_depth);
1159                                         --callee_env->n_callers;
1160
1161                                         /* after we have inlined callee, all called methods inside callee
1162                                            are now called once more */
1163                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1164                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1165                                                 ++penv->n_callers;
1166                                         }
1167
1168                                         /* remove this call from the list */
1169                                         list_del(&entry->list);
1170                                         continue;
1171                                 }
1172                         }
1173                 }
1174                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1175         }
1176
1177         for (i = 0; i < n_irgs; ++i) {
1178                 irg = get_irp_irg(i);
1179                 env = (inline_irg_env*)get_irg_link(irg);
1180
1181                 if (env->got_inline) {
1182                         optimize_graph_df(irg);
1183                         optimize_cf(irg);
1184                 }
1185                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1186                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1187                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1188                         env->n_callers_orig, env->n_callers,
1189                         get_entity_name(get_irg_entity(irg))));
1190                 }
1191         }
1192
1193         /* kill the copied graphs: we don't need them anymore */
1194         foreach_pmap(copied_graphs, pm_entry) {
1195                 ir_graph *copy = (ir_graph*)pm_entry->value;
1196
1197                 /* reset the entity, otherwise it will be deleted in the next step ... */
1198                 set_irg_entity(copy, NULL);
1199                 free_ir_graph(copy);
1200         }
1201         pmap_destroy(copied_graphs);
1202
1203         obstack_free(&temp_obst, NULL);
1204         current_ir_graph = rem;
1205 }
1206
1207 typedef struct inline_leaf_functions_pass_t {
1208         ir_prog_pass_t pass;
1209         unsigned       maxsize;
1210         unsigned       leafsize;
1211         unsigned       size;
1212         int            ignore_runtime;
1213 } inline_leaf_functions_pass_t;
1214
1215 /**
1216  * Wrapper to run inline_leaf_functions() as a ir_prog pass.
1217  */
1218 static int inline_leaf_functions_wrapper(ir_prog *irp, void *context)
1219 {
1220         inline_leaf_functions_pass_t *pass = (inline_leaf_functions_pass_t*)context;
1221
1222         (void)irp;
1223         inline_leaf_functions(
1224                 pass->maxsize, pass->leafsize,
1225                 pass->size, pass->ignore_runtime);
1226         return 0;
1227 }
1228
1229 /* create a pass for inline_leaf_functions() */
1230 ir_prog_pass_t *inline_leaf_functions_pass(
1231         const char *name, unsigned maxsize, unsigned leafsize,
1232         unsigned size, int ignore_runtime)
1233 {
1234         inline_leaf_functions_pass_t *pass = XMALLOCZ(inline_leaf_functions_pass_t);
1235
1236         pass->maxsize        = maxsize;
1237         pass->leafsize       = leafsize;
1238         pass->size           = size;
1239         pass->ignore_runtime = ignore_runtime;
1240
1241         return def_prog_pass_constructor(
1242                 &pass->pass,
1243                 name ? name : "inline_leaf_functions",
1244                 inline_leaf_functions_wrapper);
1245 }
1246
1247 /**
1248  * Calculate the parameter weights for transmitting the address of a local variable.
1249  */
1250 static unsigned calc_method_local_weight(ir_node *arg)
1251 {
1252         int      i, j, k;
1253         unsigned v, weight = 0;
1254
1255         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1256                 ir_node *succ = get_irn_out(arg, i);
1257
1258                 switch (get_irn_opcode(succ)) {
1259                 case iro_Load:
1260                 case iro_Store:
1261                         /* Loads and Store can be removed */
1262                         weight += 3;
1263                         break;
1264                 case iro_Sel:
1265                         /* check if all args are constant */
1266                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1267                                 ir_node *idx = get_Sel_index(succ, j);
1268                                 if (! is_Const(idx))
1269                                         return 0;
1270                         }
1271                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1272                            some unsupported node. */
1273                         v = calc_method_local_weight(succ);
1274                         if (v == 0)
1275                                 return 0;
1276                         /* we can kill one Sel with constant indexes, this is cheap */
1277                         weight += v + 1;
1278                         break;
1279                 case iro_Id:
1280                         /* when looking backward we might find Id nodes */
1281                         weight += calc_method_local_weight(succ);
1282                         break;
1283                 case iro_Tuple:
1284                         /* unoptimized tuple */
1285                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1286                                 ir_node *pred = get_Tuple_pred(succ, j);
1287                                 if (pred == arg) {
1288                                         /* look for Proj(j) */
1289                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1290                                                 ir_node *succ_succ = get_irn_out(succ, k);
1291                                                 if (is_Proj(succ_succ)) {
1292                                                         if (get_Proj_proj(succ_succ) == j) {
1293                                                                 /* found */
1294                                                                 weight += calc_method_local_weight(succ_succ);
1295                                                         }
1296                                                 } else {
1297                                                         /* this should NOT happen */
1298                                                         return 0;
1299                                                 }
1300                                         }
1301                                 }
1302                         }
1303                         break;
1304                 default:
1305                         /* any other node: unsupported yet or bad. */
1306                         return 0;
1307                 }
1308         }
1309         return weight;
1310 }
1311
1312 /**
1313  * Calculate the parameter weights for transmitting the address of a local variable.
1314  */
1315 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1316 {
1317         ir_entity *ent = get_irg_entity(irg);
1318         ir_type  *mtp;
1319         size_t   nparams;
1320         int      i;
1321         long     proj_nr;
1322         ir_node  *irg_args, *arg;
1323
1324         mtp      = get_entity_type(ent);
1325         nparams  = get_method_n_params(mtp);
1326
1327         /* allocate a new array. currently used as 'analysed' flag */
1328         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1329
1330         /* If the method haven't parameters we have nothing to do. */
1331         if (nparams <= 0)
1332                 return;
1333
1334         assure_irg_outs(irg);
1335         irg_args = get_irg_args(irg);
1336         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1337                 arg     = get_irn_out(irg_args, i);
1338                 proj_nr = get_Proj_proj(arg);
1339                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1340         }
1341 }
1342
1343 /**
1344  * Calculate the benefice for transmitting an local variable address.
1345  * After inlining, the local variable might be transformed into a
1346  * SSA variable by scalar_replacement().
1347  */
1348 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1349 {
1350         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1351
1352         if (env->local_weights == NULL)
1353                 analyze_irg_local_weights(env, callee);
1354
1355         if (pos < ARR_LEN(env->local_weights))
1356                 return env->local_weights[pos];
1357         return 0;
1358 }
1359
1360 /**
1361  * Calculate a benefice value for inlining the given call.
1362  *
1363  * @param call       the call node we have to inspect
1364  * @param callee     the called graph
1365  */
1366 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1367 {
1368         ir_node   *call = entry->call;
1369         ir_entity *ent  = get_irg_entity(callee);
1370         ir_type   *callee_frame;
1371         size_t    i, n_members, n_params;
1372         ir_node   *frame_ptr;
1373         ir_type   *mtp;
1374         int       weight = 0;
1375         int       all_const;
1376         unsigned  cc, v;
1377         irg_inline_property prop;
1378
1379         inline_irg_env *callee_env;
1380
1381         prop = get_irg_inline_property(callee);
1382         if (prop == irg_inline_forbidden) {
1383                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1384                     call, callee));
1385                 return entry->benefice = INT_MIN;
1386         }
1387
1388         callee_frame = get_irg_frame_type(callee);
1389         n_members = get_class_n_members(callee_frame);
1390         for (i = 0; i < n_members; ++i) {
1391                 ir_entity *frame_ent = get_class_member(callee_frame, i);
1392                 if (is_parameter_entity(frame_ent)) {
1393                         // TODO inliner should handle parameter entities by inserting Store operations
1394                         DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
1395                         set_irg_inline_property(callee, irg_inline_forbidden);
1396                         return entry->benefice = INT_MIN;
1397                 }
1398         }
1399
1400         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1401                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1402                     call, callee));
1403                 return entry->benefice = INT_MIN;
1404         }
1405
1406         /* costs for every passed parameter */
1407         n_params = get_Call_n_params(call);
1408         mtp      = get_entity_type(ent);
1409         cc       = get_method_calling_convention(mtp);
1410         if (cc & cc_reg_param) {
1411                 /* register parameter, smaller costs for register parameters */
1412                 size_t max_regs = cc & ~cc_bits;
1413
1414                 if (max_regs < n_params)
1415                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1416                 else
1417                         weight += n_params * 2;
1418         } else {
1419                 /* parameters are passed an stack */
1420                 weight += 5 * n_params;
1421         }
1422
1423         /* constant parameters improve the benefice */
1424         frame_ptr = get_irg_frame(current_ir_graph);
1425         all_const = 1;
1426         for (i = 0; i < n_params; ++i) {
1427                 ir_node *param = get_Call_param(call, i);
1428
1429                 if (is_Const(param)) {
1430                         weight += get_method_param_weight(ent, i);
1431                 } else {
1432                         all_const = 0;
1433                         if (is_SymConst(param))
1434                                 weight += get_method_param_weight(ent, i);
1435                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1436                                 /*
1437                                  * An address of a local variable is transmitted. After
1438                                  * inlining, scalar_replacement might be able to remove the
1439                                  * local variable, so honor this.
1440                                  */
1441                                 v = get_method_local_adress_weight(callee, i);
1442                                 weight += v;
1443                                 if (v > 0)
1444                                         entry->local_adr = 1;
1445                         }
1446                 }
1447         }
1448         entry->all_const = all_const;
1449
1450         callee_env = (inline_irg_env*)get_irg_link(callee);
1451         if (callee_env->n_callers == 1 &&
1452             callee != current_ir_graph &&
1453             !entity_is_externally_visible(ent)) {
1454                 weight += 700;
1455         }
1456
1457         /* give a bonus for functions with one block */
1458         if (callee_env->n_blocks == 1)
1459                 weight = weight * 3 / 2;
1460
1461         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1462         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1463                 weight += 2000;
1464
1465         /* and finally for leafs: they do not increase the register pressure
1466            because of callee safe registers */
1467         if (callee_env->n_call_nodes == 0)
1468                 weight += 400;
1469
1470         /** it's important to inline inner loops first */
1471         if (entry->loop_depth > 30)
1472                 weight += 30 * 1024;
1473         else
1474                 weight += entry->loop_depth * 1024;
1475
1476         /*
1477          * All arguments constant is probably a good sign, give an extra bonus
1478          */
1479         if (all_const)
1480                 weight += 1024;
1481
1482         return entry->benefice = weight;
1483 }
1484
1485 typedef struct walk_env_t {
1486         ir_graph **irgs;
1487         size_t   last_irg;
1488 } walk_env_t;
1489
1490 /**
1491  * Callgraph walker, collect all visited graphs.
1492  */
1493 static void callgraph_walker(ir_graph *irg, void *data)
1494 {
1495         walk_env_t *env = (walk_env_t *)data;
1496         env->irgs[env->last_irg++] = irg;
1497 }
1498
1499 /**
1500  * Creates an inline order for all graphs.
1501  *
1502  * @return the list of graphs.
1503  */
1504 static ir_graph **create_irg_list(void)
1505 {
1506         ir_entity  **free_methods;
1507         size_t     n_irgs = get_irp_n_irgs();
1508         walk_env_t env;
1509
1510         cgana(&free_methods);
1511         xfree(free_methods);
1512
1513         compute_callgraph();
1514
1515         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1516         env.last_irg = 0;
1517
1518         callgraph_walk(NULL, callgraph_walker, &env);
1519         assert(n_irgs == env.last_irg);
1520
1521         free_callgraph();
1522
1523         return env.irgs;
1524 }
1525
1526 /**
1527  * Push a call onto the priority list if its benefice is big enough.
1528  *
1529  * @param pqueue   the priority queue of calls
1530  * @param call     the call entry
1531  * @param inlien_threshold
1532  *                 the threshold value
1533  */
1534 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1535                             int inline_threshold)
1536 {
1537         ir_graph            *callee  = call->callee;
1538         irg_inline_property prop     = get_irg_inline_property(callee);
1539         int                 benefice = calc_inline_benefice(call, callee);
1540
1541         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1542             get_irn_irg(call->call), call->call, callee, benefice));
1543
1544         if (prop < irg_inline_forced && benefice < inline_threshold) {
1545                 return;
1546         }
1547
1548         pqueue_put(pqueue, call, benefice);
1549 }
1550
1551 /**
1552  * Try to inline calls into a graph.
1553  *
1554  * @param irg      the graph into which we inline
1555  * @param maxsize  do NOT inline if the size of irg gets
1556  *                 bigger than this amount
1557  * @param inline_threshold
1558  *                 threshold value for inline decision
1559  * @param copied_graphs
1560  *                 map containing copied of recursive graphs
1561  */
1562 static void inline_into(ir_graph *irg, unsigned maxsize,
1563                         int inline_threshold, pmap *copied_graphs)
1564 {
1565         int            phiproj_computed = 0;
1566         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1567         call_entry     *curr_call;
1568         wenv_t         wenv;
1569         pqueue_t       *pqueue;
1570
1571         if (env->n_call_nodes == 0)
1572                 return;
1573
1574         if (env->n_nodes > maxsize) {
1575                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1576                 return;
1577         }
1578
1579         current_ir_graph = irg;
1580         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1581
1582         /* put irgs into the pqueue */
1583         pqueue = new_pqueue();
1584
1585         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1586                 assert(is_Call(curr_call->call));
1587                 maybe_push_call(pqueue, curr_call, inline_threshold);
1588         }
1589
1590         /* note that the list of possible calls is updated during the process */
1591         while (!pqueue_empty(pqueue)) {
1592                 int                 did_inline;
1593                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1594                 ir_graph            *callee     = curr_call->callee;
1595                 ir_node             *call_node  = curr_call->call;
1596                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1597                 irg_inline_property prop        = get_irg_inline_property(callee);
1598                 ir_graph            *calleee;
1599                 int                 loop_depth;
1600                 const call_entry    *centry;
1601
1602                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1603                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1604                                                 env->n_nodes, callee, callee_env->n_nodes));
1605                         continue;
1606                 }
1607
1608                 calleee = (ir_graph*)pmap_get(copied_graphs, callee);
1609                 if (calleee != NULL) {
1610                         int benefice = curr_call->benefice;
1611                         /*
1612                          * Reduce the weight for recursive function IFF not all arguments are const.
1613                          * inlining recursive functions is rarely good.
1614                          */
1615                         if (!curr_call->all_const)
1616                                 benefice -= 2000;
1617                         if (benefice < inline_threshold)
1618                                 continue;
1619
1620                         /*
1621                          * Remap callee if we have a copy.
1622                          */
1623                         callee     = calleee;
1624                         callee_env = (inline_irg_env*)get_irg_link(callee);
1625                 }
1626
1627                 if (current_ir_graph == callee) {
1628                         /*
1629                          * Recursive call: we cannot directly inline because we cannot
1630                          * walk the graph and change it. So we have to make a copy of
1631                          * the graph first.
1632                          */
1633                         int benefice = curr_call->benefice;
1634                         ir_graph *copy;
1635
1636                         /*
1637                          * Reduce the weight for recursive function IFF not all arguments are const.
1638                          * inlining recursive functions is rarely good.
1639                          */
1640                         if (!curr_call->all_const)
1641                                 benefice -= 2000;
1642                         if (benefice < inline_threshold)
1643                                 continue;
1644
1645                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1646
1647                         /*
1648                          * No copy yet, create one.
1649                          * Note that recursive methods are never leafs, so it is
1650                          * sufficient to test this condition here.
1651                          */
1652                         copy = create_irg_copy(callee);
1653
1654                         /* create_irg_copy() destroys the Proj links, recompute them */
1655                         phiproj_computed = 0;
1656
1657                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1658
1659                         /* allocate a new environment */
1660                         callee_env = alloc_inline_irg_env();
1661                         set_irg_link(copy, callee_env);
1662
1663                         assure_loopinfo(copy);
1664                         memset(&wenv, 0, sizeof(wenv));
1665                         wenv.x              = callee_env;
1666                         wenv.ignore_callers = 1;
1667                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1668
1669                         /*
1670                          * Enter the entity of the original graph. This is needed
1671                          * for inline_method(). However, note that ent->irg still points
1672                          * to callee, NOT to copy.
1673                          */
1674                         set_irg_entity(copy, get_irg_entity(callee));
1675
1676                         pmap_insert(copied_graphs, callee, copy);
1677                         callee = copy;
1678
1679                         /* we have only one caller: the original graph */
1680                         callee_env->n_callers      = 1;
1681                         callee_env->n_callers_orig = 1;
1682                 }
1683                 if (! phiproj_computed) {
1684                         phiproj_computed = 1;
1685                         collect_phiprojs(current_ir_graph);
1686                 }
1687                 did_inline = inline_method(call_node, callee);
1688                 if (!did_inline)
1689                         continue;
1690
1691                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1692                 phiproj_computed = 0;
1693
1694                 /* remove it from the caller list */
1695                 list_del(&curr_call->list);
1696
1697                 /* callee was inline. Append its call list. */
1698                 env->got_inline = 1;
1699                 --env->n_call_nodes;
1700
1701                 /* we just generate a bunch of new calls */
1702                 loop_depth = curr_call->loop_depth;
1703                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1704                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1705                         ir_node        *new_call;
1706                         call_entry     *new_entry;
1707
1708                         /* after we have inlined callee, all called methods inside
1709                          * callee are now called once more */
1710                         ++penv->n_callers;
1711
1712                         /* Note that the src list points to Call nodes in the inlined graph,
1713                          * but we need Call nodes in our graph. Luckily the inliner leaves
1714                          * this information in the link field. */
1715                         new_call = (ir_node*)get_irn_link(centry->call);
1716                         if (get_irn_irg(new_call) != irg) {
1717                                 /* centry->call has not been copied, which means it is dead.
1718                                  * This might happen during inlining, if a const function,
1719                                  * which cannot be inlined is only used as an unused argument
1720                                  * of another function, which is inlined. */
1721                                 continue;
1722                         }
1723                         assert(is_Call(new_call));
1724
1725                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1726                         list_add_tail(&new_entry->list, &env->calls);
1727                         maybe_push_call(pqueue, new_entry, inline_threshold);
1728                 }
1729
1730                 env->n_call_nodes += callee_env->n_call_nodes;
1731                 env->n_nodes += callee_env->n_nodes;
1732                 --callee_env->n_callers;
1733         }
1734         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1735         del_pqueue(pqueue);
1736 }
1737
1738 /*
1739  * Heuristic inliner. Calculates a benefice value for every call and inlines
1740  * those calls with a value higher than the threshold.
1741  */
1742 void inline_functions(unsigned maxsize, int inline_threshold,
1743                       opt_ptr after_inline_opt)
1744 {
1745         inline_irg_env   *env;
1746         size_t           i, n_irgs;
1747         ir_graph         *rem;
1748         wenv_t           wenv;
1749         pmap             *copied_graphs;
1750         pmap_entry       *pm_entry;
1751         ir_graph         **irgs;
1752
1753         rem = current_ir_graph;
1754         obstack_init(&temp_obst);
1755
1756         irgs = create_irg_list();
1757
1758         /* a map for the copied graphs, used to inline recursive calls */
1759         copied_graphs = pmap_create();
1760
1761         /* extend all irgs by a temporary data structure for inlining. */
1762         n_irgs = get_irp_n_irgs();
1763         for (i = 0; i < n_irgs; ++i)
1764                 set_irg_link(irgs[i], alloc_inline_irg_env());
1765
1766         /* Pre-compute information in temporary data structure. */
1767         wenv.ignore_runtime = 0;
1768         wenv.ignore_callers = 0;
1769         for (i = 0; i < n_irgs; ++i) {
1770                 ir_graph *irg = irgs[i];
1771
1772                 free_callee_info(irg);
1773
1774                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1775                 assure_loopinfo(irg);
1776                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1777         }
1778
1779         /* -- and now inline. -- */
1780         for (i = 0; i < n_irgs; ++i) {
1781                 ir_graph *irg = irgs[i];
1782
1783                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1784         }
1785
1786         for (i = 0; i < n_irgs; ++i) {
1787                 ir_graph *irg = irgs[i];
1788
1789                 env = (inline_irg_env*)get_irg_link(irg);
1790                 if (env->got_inline && after_inline_opt != NULL) {
1791                         /* this irg got calls inlined: optimize it */
1792                         after_inline_opt(irg);
1793                 }
1794                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1795                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1796                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1797                         env->n_callers_orig, env->n_callers,
1798                         get_entity_name(get_irg_entity(irg))));
1799                 }
1800         }
1801
1802         /* kill the copied graphs: we don't need them anymore */
1803         foreach_pmap(copied_graphs, pm_entry) {
1804                 ir_graph *copy = (ir_graph*)pm_entry->value;
1805
1806                 /* reset the entity, otherwise it will be deleted in the next step ... */
1807                 set_irg_entity(copy, NULL);
1808                 free_ir_graph(copy);
1809         }
1810         pmap_destroy(copied_graphs);
1811
1812         xfree(irgs);
1813
1814         obstack_free(&temp_obst, NULL);
1815         current_ir_graph = rem;
1816 }
1817
1818 typedef struct inline_functions_pass_t {
1819         ir_prog_pass_t pass;
1820         unsigned       maxsize;
1821         int            inline_threshold;
1822         opt_ptr        after_inline_opt;
1823 } inline_functions_pass_t;
1824
1825 /**
1826  * Wrapper to run inline_functions() as a ir_prog pass.
1827  */
1828 static int inline_functions_wrapper(ir_prog *irp, void *context)
1829 {
1830         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1831
1832         (void)irp;
1833         inline_functions(pass->maxsize, pass->inline_threshold,
1834                          pass->after_inline_opt);
1835         return 0;
1836 }
1837
1838 /* create a ir_prog pass for inline_functions */
1839 ir_prog_pass_t *inline_functions_pass(
1840           const char *name, unsigned maxsize, int inline_threshold,
1841           opt_ptr after_inline_opt)
1842 {
1843         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1844
1845         pass->maxsize          = maxsize;
1846         pass->inline_threshold = inline_threshold;
1847         pass->after_inline_opt = after_inline_opt;
1848
1849         return def_prog_pass_constructor(
1850                 &pass->pass, name ? name : "inline_functions",
1851                 inline_functions_wrapper);
1852 }
1853
1854 void firm_init_inline(void)
1855 {
1856         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1857 }