nsz Git - libfirm/blob - ir/opt/opt_inline.c

   1 /*
   2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief    Dead node elimination and Procedure Inlining.
  23  * @author   Michael Beck, Goetz Lindenmaier
  24  * @version  $Id$
  25  */
  26 #include "config.h"
  27
  28 #include <limits.h>
  29 #include <stdbool.h>
  30 #include <assert.h>
  31
  32 #include "irnode_t.h"
  33 #include "irgraph_t.h"
  34 #include "irprog_t.h"
  35
  36 #include "iroptimize.h"
  37 #include "ircons_t.h"
  38 #include "iropt_t.h"
  39 #include "irgopt.h"
  40 #include "irgmod.h"
  41 #include "irgwalk.h"
  42
  43 #include "array_t.h"
  44 #include "list.h"
  45 #include "pset.h"
  46 #include "pmap.h"
  47 #include "pdeq.h"
  48 #include "xmalloc.h"
  49 #include "pqueue.h"
  50
  51 #include "irouts.h"
  52 #include "irloop_t.h"
  53 #include "irbackedge_t.h"
  54 #include "opt_init.h"
  55 #include "cgana.h"
  56 #include "trouts.h"
  57 #include "error.h"
  58
  59 #include "analyze_irg_args.h"
  60 #include "iredges_t.h"
  61 #include "irflag_t.h"
  62 #include "irhooks.h"
  63 #include "irtools.h"
  64 #include "iropt_dbg.h"
  65 #include "irpass_t.h"
  66 #include "irphase_t.h"
  67
  68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  69
  70 /*------------------------------------------------------------------*/
  71 /* Routines for dead node elimination / copying garbage collection  */
  72 /* of the obstack.                                                  */
  73 /*------------------------------------------------------------------*/
  74
  75 /**
  76  * Remember the new node in the old node by using a field all nodes have.
  77  */
  78 static void set_new_node(ir_node *node, ir_node *new_node)
  79 {
  80         set_irn_link(node, new_node);
  81 }
  82
  83 /**
  84  * Get this new node, before the old node is forgotten.
  85  */
  86 static inline ir_node *get_new_node(ir_node *old_node)
  87 {
  88         assert(irn_visited(old_node));
  89         return (ir_node*) get_irn_link(old_node);
  90 }
  91
  92 /*--------------------------------------------------------------------*/
  93 /*  Functionality for inlining                                         */
  94 /*--------------------------------------------------------------------*/
  95
  96 /**
  97  * Copy node for inlineing.  Updates attributes that change when
  98  * inlineing but not for dead node elimination.
  99  *
 100  * Copies the node by calling copy_node() and then updates the entity if
 101  * it's a local one.  env must be a pointer of the frame type of the
 102  * inlined procedure. The new entities must be in the link field of
 103  * the entities.
 104  */
 105 static void copy_node_inline(ir_node *node, void *env)
 106 {
 107         ir_graph *new_irg  = (ir_graph*) env;
 108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
 109
 110         set_new_node(node, new_node);
 111         if (is_Sel(node)) {
 112                 ir_graph  *old_irg        = get_irn_irg(node);
 113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
 114                 ir_entity *old_entity     = get_Sel_entity(node);
 115                 assert(is_Sel(new_node));
 116                 /* use copied entities from the new frame */
 117                 if (get_entity_owner(old_entity) == old_frame_type) {
 118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
 119                         assert(new_entity != NULL);
 120                         set_Sel_entity(new_node, new_entity);
 121                 }
 122         } else if (is_Block(new_node)) {
 123                 new_node->attr.block.irg.irg = new_irg;
 124         }
 125 }
 126
 127 static void set_preds_inline(ir_node *node, void *env)
 128 {
 129         ir_node *new_node;
 130
 131         irn_rewire_inputs(node);
 132
 133         /* move constants into start block */
 134         new_node = get_new_node(node);
 135         if (is_irn_constlike(new_node)) {
 136                 ir_graph *new_irg     = (ir_graph *) env;
 137                 ir_node  *start_block = get_irg_start_block(new_irg);
 138                 set_nodes_block(new_node, start_block);
 139         }
 140 }
 141
 142 /**
 143  * Walker: checks if P_value_arg_base is used.
 144  */
 145 static void find_addr(ir_node *node, void *env)
 146 {
 147         bool *allow_inline = (bool*)env;
 148
 149         if (is_Sel(node)) {
 150                 ir_graph *irg = current_ir_graph;
 151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
 152                         /* access to frame */
 153                         ir_entity *ent = get_Sel_entity(node);
 154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
 155                                 /* access to value_type */
 156                                 *allow_inline = false;
 157                         }
 158                 }
 159         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
 160                 /* From GCC:
 161                  * Refuse to inline alloca call unless user explicitly forced so as this
 162                  * may change program's memory overhead drastically when the function
 163                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
 164                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
 165                  *
 166                  * Sorrily this is true with our implementation also.
 167                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
 168                  * this disables inlining of functions using VLA (which are completely
 169                  * save).
 170                  *
 171                  * 2 Solutions:
 172                  * - add a flag to the Alloc node for "real" alloca() calls
 173                  * - add a new Stack-Restore node at the end of a function using
 174                  *   alloca()
 175                  */
 176                 *allow_inline = false;
 177         }
 178 }
 179
 180 /**
 181  * Check if we can inline a given call.
 182  * Currently, we cannot inline two cases:
 183  * - call with compound arguments
 184  * - graphs that take the address of a parameter
 185  *
 186  * check these conditions here
 187  */
 188 static bool can_inline(ir_node *call, ir_graph *called_graph)
 189 {
 190         ir_entity          *called      = get_irg_entity(called_graph);
 191         ir_type            *called_type = get_entity_type(called);
 192         ir_type            *call_type   = get_Call_type(call);
 193         size_t              n_params    = get_method_n_params(called_type);
 194         size_t              n_arguments = get_method_n_params(call_type);
 195         size_t              n_res       = get_method_n_ress(called_type);
 196         irg_inline_property prop        = get_irg_inline_property(called_graph);
 197         size_t              i;
 198         bool                res;
 199
 200         if (prop == irg_inline_forbidden)
 201                 return false;
 202
 203         if (n_arguments != n_params) {
 204                 /* this is a bad feature of C: without a prototype, we can
 205                  * call a function with less parameters than needed. Currently
 206                  * we don't support this, although we could use Unknown than. */
 207                 return false;
 208         }
 209         if (n_res != get_method_n_ress(call_type)) {
 210                 return false;
 211         }
 212
 213         /* Argh, compiling C has some bad consequences:
 214          * It is implementation dependent what happens in that case.
 215          * We support inlining, if the bitsize of the types matches AND
 216          * the same arithmetic is used. */
 217         for (i = 0; i < n_params; ++i) {
 218                 ir_type *param_tp = get_method_param_type(called_type, i);
 219                 ir_type *arg_tp   = get_method_param_type(call_type, i);
 220
 221                 if (param_tp != arg_tp) {
 222                         ir_mode *pmode = get_type_mode(param_tp);
 223                         ir_mode *amode = get_type_mode(arg_tp);
 224
 225                         if (pmode == NULL || amode == NULL)
 226                                 return false;
 227                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
 228                                 return false;
 229                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
 230                                 return false;
 231                         /* otherwise we can simply "reinterpret" the bits */
 232                 }
 233         }
 234         for (i = 0; i < n_res; ++i) {
 235                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
 236                 ir_type *used_res_tp = get_method_res_type(call_type, i);
 237
 238                 if (decl_res_tp != used_res_tp) {
 239                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
 240                         ir_mode *used_mode = get_type_mode(used_res_tp);
 241                         if (decl_mode == NULL || used_mode == NULL)
 242                                 return false;
 243                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
 244                                 return false;
 245                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
 246                                 return false;
 247                         /* otherwise we can "reinterpret" the bits */
 248                 }
 249         }
 250
 251         /* check parameters for compound arguments */
 252         for (i = 0; i < n_params; ++i) {
 253                 ir_type *p_type = get_method_param_type(call_type, i);
 254
 255                 if (is_compound_type(p_type))
 256                         return false;
 257         }
 258
 259         /* check results for compound arguments */
 260         for (i = 0; i < n_res; ++i) {
 261                 ir_type *r_type = get_method_res_type(call_type, i);
 262
 263                 if (is_compound_type(r_type))
 264                         return false;
 265         }
 266
 267         res = true;
 268         irg_walk_graph(called_graph, find_addr, NULL, &res);
 269
 270         return res;
 271 }
 272
 273 enum exc_mode {
 274         exc_handler,    /**< There is a handler. */
 275         exc_no_handler  /**< Exception handling not represented. */
 276 };
 277
 278 /**
 279  * copy all entities on the stack frame on 1 irg to the stackframe of another.
 280  * Sets entity links of the old entities to the copies
 281  */
 282 static void copy_frame_entities(ir_graph *from, ir_graph *to)
 283 {
 284         ir_type *from_frame = get_irg_frame_type(from);
 285         ir_type *to_frame   = get_irg_frame_type(to);
 286         size_t   n_members  = get_class_n_members(from_frame);
 287         size_t   i;
 288         assert(from_frame != to_frame);
 289
 290         for (i = 0; i < n_members; ++i) {
 291                 ir_entity *old_ent = get_class_member(from_frame, i);
 292                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
 293                 set_entity_link(old_ent, new_ent);
 294         }
 295 }
 296
 297 /* Inlines a method at the given call site. */
 298 int inline_method(ir_node *call, ir_graph *called_graph)
 299 {
 300         ir_node       *pre_call;
 301         ir_node       *post_call, *post_bl;
 302         ir_node       *in[pn_Start_max];
 303         ir_node       *end, *end_bl, *block;
 304         ir_node       **res_pred;
 305         ir_node       **cf_pred;
 306         ir_node       **args_in;
 307         ir_node       *ret, *phi;
 308         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
 309         int           irn_arity, n_params;
 310         int           n_mem_phi;
 311         enum exc_mode exc_handling;
 312         ir_type       *mtp;
 313         ir_type       *ctp;
 314         ir_entity     *ent;
 315         ir_graph      *rem;
 316         ir_graph      *irg = get_irn_irg(call);
 317
 318         /* we cannot inline some types of calls */
 319         if (! can_inline(call, called_graph))
 320                 return 0;
 321
 322         /* We cannot inline a recursive call. The graph must be copied before
 323          * the call the inline_method() using create_irg_copy(). */
 324         if (called_graph == irg)
 325                 return 0;
 326
 327         ent      = get_irg_entity(called_graph);
 328         mtp      = get_entity_type(ent);
 329         ctp      = get_Call_type(call);
 330         n_params = get_method_n_params(mtp);
 331         n_res    = get_method_n_ress(mtp);
 332
 333         rem = current_ir_graph;
 334         current_ir_graph = irg;
 335
 336         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
 337
 338         /* optimizations can cause problems when allocating new nodes */
 339         rem_opt = get_opt_optimize();
 340         set_optimize(0);
 341
 342         /* Handle graph state */
 343         assert(get_irg_phase_state(irg) != phase_building);
 344         assert(get_irg_pinned(irg) == op_pin_state_pinned);
 345         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
 346         set_irg_outs_inconsistent(irg);
 347         set_irg_extblk_inconsistent(irg);
 348         set_irg_doms_inconsistent(irg);
 349         set_irg_loopinfo_inconsistent(irg);
 350         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 351         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
 352         edges_deactivate(irg);
 353
 354         /* here we know we WILL inline, so inform the statistics */
 355         hook_inline(call, called_graph);
 356
 357         /* -- Decide how to handle exception control flow: Is there a handler
 358            for the Call node, or do we branch directly to End on an exception?
 359            exc_handling:
 360            0 There is a handler.
 361            2 Exception handling not represented in Firm. -- */
 362         {
 363                 ir_node *Xproj = NULL;
 364                 ir_node *proj;
 365                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
 366                      proj = (ir_node*)get_irn_link(proj)) {
 367                         long proj_nr = get_Proj_proj(proj);
 368                         if (proj_nr == pn_Call_X_except) Xproj = proj;
 369                 }
 370                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
 371         }
 372
 373         /* create the argument tuple */
 374         args_in = ALLOCAN(ir_node*, n_params);
 375
 376         block = get_nodes_block(call);
 377         for (i = n_params - 1; i >= 0; --i) {
 378                 ir_node *arg      = get_Call_param(call, i);
 379                 ir_type *param_tp = get_method_param_type(mtp, i);
 380                 ir_mode *mode     = get_type_mode(param_tp);
 381
 382                 if (mode != get_irn_mode(arg)) {
 383                         arg = new_r_Conv(block, arg, mode);
 384                 }
 385                 args_in[i] = arg;
 386         }
 387
 388         /* the procedure and later replaces the Start node of the called graph.
 389          * Post_call is the old Call node and collects the results of the called
 390          * graph. Both will end up being a tuple. */
 391         post_bl = get_nodes_block(call);
 392         /* XxMxPxPxPxT of Start + parameter of Call */
 393         in[pn_Start_M]              = get_Call_mem(call);
 394         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
 395         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
 396         in[pn_Start_P_tls]          = get_irg_tls(irg);
 397         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
 398         pre_call = new_r_Tuple(post_bl, pn_Start_max, in);
 399         post_call = call;
 400
 401         /* --
 402            The new block gets the ins of the old block, pre_call and all its
 403            predecessors and all Phi nodes. -- */
 404         part_block(pre_call);
 405
 406         /* increment visited flag for later walk */
 407         inc_irg_visited(called_graph);
 408
 409         /* link some nodes to nodes in the current graph so instead of copying
 410          * the linked nodes will get used.
 411          * So the copier will use the created Tuple instead of copying the start
 412          * node, similar for singleton nodes like NoMem and Bad.
 413          * Note: this will prohibit predecessors to be copied - only do it for
 414          *       nodes without predecessors */
 415         {
 416                 ir_node *start_block;
 417                 ir_node *start;
 418                 ir_node *bad;
 419                 ir_node *nomem;
 420
 421                 start_block = get_irg_start_block(called_graph);
 422                 set_new_node(start_block, get_nodes_block(pre_call));
 423                 mark_irn_visited(start_block);
 424
 425                 start = get_irg_start(called_graph);
 426                 set_new_node(start, pre_call);
 427                 mark_irn_visited(start);
 428
 429                 bad = get_irg_bad(called_graph);
 430                 set_new_node(bad, get_irg_bad(irg));
 431                 mark_irn_visited(bad);
 432
 433                 nomem = get_irg_no_mem(called_graph);
 434                 set_new_node(nomem, get_irg_no_mem(irg));
 435                 mark_irn_visited(nomem);
 436         }
 437
 438         /* entitiy link is used to link entities on old stackframe to the
 439          * new stackframe */
 440         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
 441
 442         /* copy entities and nodes */
 443         assert(!irn_visited(get_irg_end(called_graph)));
 444         copy_frame_entities(called_graph, irg);
 445         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
 446                       irg);
 447
 448         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
 449
 450         /* -- Merge the end of the inlined procedure with the call site -- */
 451         /* We will turn the old Call node into a Tuple with the following
 452            predecessors:
 453            -1:  Block of Tuple.
 454            0: Phi of all Memories of Return statements.
 455            1: Jmp from new Block that merges the control flow from all exception
 456            predecessors of the old end block.
 457            2: Tuple of all arguments.
 458            3: Phi of Exception memories.
 459            In case the old Call directly branches to End on an exception we don't
 460            need the block merging all exceptions nor the Phi of the exception
 461            memories.
 462         */
 463
 464         /* Precompute some values */
 465         end_bl = get_new_node(get_irg_end_block(called_graph));
 466         end    = get_new_node(get_irg_end(called_graph));
 467         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
 468         n_res  = get_method_n_ress(get_Call_type(call));
 469
 470         res_pred = XMALLOCN(ir_node*, n_res);
 471         cf_pred  = XMALLOCN(ir_node*, arity);
 472
 473         /* archive keepalives */
 474         irn_arity = get_irn_arity(end);
 475         for (i = 0; i < irn_arity; i++) {
 476                 ir_node *ka = get_End_keepalive(end, i);
 477                 if (! is_Bad(ka))
 478                         add_End_keepalive(get_irg_end(irg), ka);
 479         }
 480
 481         /* replace Return nodes by Jump nodes */
 482         n_ret = 0;
 483         for (i = 0; i < arity; i++) {
 484                 ir_node *ret;
 485                 ret = get_Block_cfgpred(end_bl, i);
 486                 if (is_Return(ret)) {
 487                         ir_node *block = get_nodes_block(ret);
 488                         cf_pred[n_ret] = new_r_Jmp(block);
 489                         n_ret++;
 490                 }
 491         }
 492         set_irn_in(post_bl, n_ret, cf_pred);
 493
 494         /* build a Tuple for all results of the method.
 495          * add Phi node if there was more than one Return. */
 496         turn_into_tuple(post_call, pn_Call_max);
 497         /* First the Memory-Phi */
 498         n_mem_phi = 0;
 499         for (i = 0; i < arity; i++) {
 500                 ret = get_Block_cfgpred(end_bl, i);
 501                 if (is_Return(ret)) {
 502                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
 503                 }
 504                 /* memory output for some exceptions is directly connected to End */
 505                 if (is_Call(ret)) {
 506                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
 507                 } else if (is_fragile_op(ret)) {
 508                         /* We rely that all cfops have the memory output at the same position. */
 509                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
 510                 } else if (is_Raise(ret)) {
 511                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
 512                 }
 513         }
 514         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
 515         set_Tuple_pred(call, pn_Call_M, phi);
 516         /* Conserve Phi-list for further inlinings -- but might be optimized */
 517         if (get_nodes_block(phi) == post_bl) {
 518                 set_irn_link(phi, get_irn_link(post_bl));
 519                 set_irn_link(post_bl, phi);
 520         }
 521         /* Now the real results */
 522         if (n_res > 0) {
 523                 ir_node *result_tuple;
 524                 for (j = 0; j < n_res; j++) {
 525                         ir_type *res_type = get_method_res_type(ctp, j);
 526                         ir_mode *res_mode = get_type_mode(res_type);
 527                         n_ret = 0;
 528                         for (i = 0; i < arity; i++) {
 529                                 ret = get_Block_cfgpred(end_bl, i);
 530                                 if (is_Return(ret)) {
 531                                         ir_node *res = get_Return_res(ret, j);
 532                                         if (get_irn_mode(res) != res_mode) {
 533                                                 ir_node *block = get_nodes_block(res);
 534                                                 res = new_r_Conv(block, res, res_mode);
 535                                         }
 536                                         cf_pred[n_ret] = res;
 537                                         n_ret++;
 538                                 }
 539                         }
 540                         if (n_ret > 0) {
 541                                 ir_mode *mode = get_irn_mode(cf_pred[0]);
 542                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, mode);
 543                         } else {
 544                                 phi = new_r_Bad(irg);
 545                         }
 546                         res_pred[j] = phi;
 547                         /* Conserve Phi-list for further inlinings -- but might be optimized */
 548                         if (get_nodes_block(phi) == post_bl) {
 549                                 set_Phi_next(phi, get_Block_phis(post_bl));
 550                                 set_Block_phis(post_bl, phi);
 551                         }
 552                 }
 553                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
 554                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
 555         } else {
 556                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg));
 557         }
 558         /* handle the regular call */
 559         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
 560
 561         /* For now, we cannot inline calls with value_base */
 562         set_Tuple_pred(call, pn_Call_P_value_res_base, new_r_Bad(irg));
 563
 564         /* Finally the exception control flow.
 565            We have two possible situations:
 566            First if the Call branches to an exception handler:
 567            We need to add a Phi node to
 568            collect the memory containing the exception objects.  Further we need
 569            to add another block to get a correct representation of this Phi.  To
 570            this block we add a Jmp that resolves into the X output of the Call
 571            when the Call is turned into a tuple.
 572            Second: There is no exception edge. Just add all inlined exception
 573            branches to the End node.
 574          */
 575         if (exc_handling == exc_handler) {
 576                 n_exc = 0;
 577                 for (i = 0; i < arity; i++) {
 578                         ir_node *ret, *irn;
 579                         ret = get_Block_cfgpred(end_bl, i);
 580                         irn = skip_Proj(ret);
 581                         if (is_fragile_op(irn) || is_Raise(irn)) {
 582                                 cf_pred[n_exc] = ret;
 583                                 ++n_exc;
 584                         }
 585                 }
 586                 if (n_exc > 0) {
 587                         if (n_exc == 1) {
 588                                 /* simple fix */
 589                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
 590                         } else {
 591                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
 592                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
 593                         }
 594                 } else {
 595                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
 596                 }
 597         } else {
 598                 ir_node *main_end_bl;
 599                 int main_end_bl_arity;
 600                 ir_node **end_preds;
 601
 602                 /* assert(exc_handling == 1 || no exceptions. ) */
 603                 n_exc = 0;
 604                 for (i = 0; i < arity; i++) {
 605                         ir_node *ret = get_Block_cfgpred(end_bl, i);
 606                         ir_node *irn = skip_Proj(ret);
 607
 608                         if (is_fragile_op(irn) || is_Raise(irn)) {
 609                                 cf_pred[n_exc] = ret;
 610                                 n_exc++;
 611                         }
 612                 }
 613                 main_end_bl       = get_irg_end_block(irg);
 614                 main_end_bl_arity = get_irn_arity(main_end_bl);
 615                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
 616
 617                 for (i = 0; i < main_end_bl_arity; ++i)
 618                         end_preds[i] = get_irn_n(main_end_bl, i);
 619                 for (i = 0; i < n_exc; ++i)
 620                         end_preds[main_end_bl_arity + i] = cf_pred[i];
 621                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
 622                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
 623                 free(end_preds);
 624         }
 625         free(res_pred);
 626         free(cf_pred);
 627
 628         /* --  Turn CSE back on. -- */
 629         set_optimize(rem_opt);
 630         current_ir_graph = rem;
 631
 632         return 1;
 633 }
 634
 635 /********************************************************************/
 636 /* Apply inlining to small methods.                                 */
 637 /********************************************************************/
 638
 639 static struct obstack  temp_obst;
 640
 641 /** Represents a possible inlinable call in a graph. */
 642 typedef struct call_entry {
 643         ir_node    *call;       /**< The Call node. */
 644         ir_graph   *callee;     /**< The callee IR-graph. */
 645         list_head  list;        /**< List head for linking the next one. */
 646         int        loop_depth;  /**< The loop depth of this call. */
 647         int        benefice;    /**< The calculated benefice of this call. */
 648         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
 649         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
 650 } call_entry;
 651
 652 /**
 653  * environment for inlining small irgs
 654  */
 655 typedef struct inline_env_t {
 656         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
 657         list_head      calls; /**< The call entry list. */
 658 } inline_env_t;
 659
 660 /**
 661  * Returns the irg called from a Call node. If the irg is not
 662  * known, NULL is returned.
 663  *
 664  * @param call  the call node
 665  */
 666 static ir_graph *get_call_called_irg(ir_node *call)
 667 {
 668         ir_node *addr;
 669
 670         addr = get_Call_ptr(call);
 671         if (is_Global(addr)) {
 672                 ir_entity *ent = get_Global_entity(addr);
 673                 /* we don't know which function gets finally bound to a weak symbol */
 674                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
 675                         return NULL;
 676
 677                 return get_entity_irg(ent);
 678         }
 679
 680         return NULL;
 681 }
 682
 683 /**
 684  * Walker: Collect all calls to known graphs inside a graph.
 685  */
 686 static void collect_calls(ir_node *call, void *env)
 687 {
 688         (void) env;
 689         if (is_Call(call)) {
 690                 ir_graph *called_irg = get_call_called_irg(call);
 691
 692                 if (called_irg != NULL) {
 693                         /* The Call node calls a locally defined method.  Remember to inline. */
 694                         inline_env_t *ienv  = (inline_env_t*)env;
 695                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
 696                         entry->call       = call;
 697                         entry->callee     = called_irg;
 698                         entry->loop_depth = 0;
 699                         entry->benefice   = 0;
 700                         entry->local_adr  = 0;
 701                         entry->all_const  = 0;
 702
 703                         list_add_tail(&entry->list, &ienv->calls);
 704                 }
 705         }
 706 }
 707
 708 /**
 709  * Inlines all small methods at call sites where the called address comes
 710  * from a Const node that references the entity representing the called
 711  * method.
 712  * The size argument is a rough measure for the code size of the method:
 713  * Methods where the obstack containing the firm graph is smaller than
 714  * size are inlined.
 715  */
 716 void inline_small_irgs(ir_graph *irg, int size)
 717 {
 718         ir_graph *rem = current_ir_graph;
 719         inline_env_t env;
 720         call_entry *entry;
 721
 722         current_ir_graph = irg;
 723         /* Handle graph state */
 724         assert(get_irg_phase_state(irg) != phase_building);
 725         free_callee_info(irg);
 726
 727         /* Find Call nodes to inline.
 728            (We can not inline during a walk of the graph, as inlining the same
 729            method several times changes the visited flag of the walked graph:
 730            after the first inlining visited of the callee equals visited of
 731            the caller.  With the next inlining both are increased.) */
 732         obstack_init(&env.obst);
 733         INIT_LIST_HEAD(&env.calls);
 734         irg_walk_graph(irg, NULL, collect_calls, &env);
 735
 736         if (! list_empty(&env.calls)) {
 737                 /* There are calls to inline */
 738                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
 739                 collect_phiprojs(irg);
 740
 741                 list_for_each_entry(call_entry, entry, &env.calls, list) {
 742                         ir_graph            *callee = entry->callee;
 743                         irg_inline_property prop    = get_irg_inline_property(callee);
 744
 745                         if (prop == irg_inline_forbidden) {
 746                                 continue;
 747                         }
 748
 749                         if (prop >= irg_inline_forced ||
 750                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
 751                                 inline_method(entry->call, callee);
 752                         }
 753                 }
 754                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
 755         }
 756         obstack_free(&env.obst, NULL);
 757         current_ir_graph = rem;
 758 }
 759
 760 typedef struct inline_small_irgs_pass_t {
 761         ir_graph_pass_t pass;
 762         int            size;
 763 } inline_small_irgs_pass_t;
 764
 765 /**
 766  * Wrapper to run inline_small_irgs() as a pass.
 767  */
 768 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
 769 {
 770         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
 771
 772         inline_small_irgs(irg, pass->size);
 773         return 0;
 774 }
 775
 776 /* create a pass for inline_small_irgs() */
 777 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
 778 {
 779         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
 780
 781         pass->size = size;
 782         return def_graph_pass_constructor(
 783                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
 784 }
 785
 786 /**
 787  * Environment for inlining irgs.
 788  */
 789 typedef struct {
 790         list_head calls;             /**< List of of all call nodes in this graph. */
 791         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
 792         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
 793         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
 794         unsigned  n_nodes_orig;      /**< for statistics */
 795         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
 796         unsigned  n_call_nodes_orig; /**< for statistics */
 797         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
 798         unsigned  n_callers_orig;    /**< for statistics */
 799         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
 800         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
 801 } inline_irg_env;
 802
 803 /**
 804  * Allocate a new environment for inlining.
 805  */
 806 static inline_irg_env *alloc_inline_irg_env(void)
 807 {
 808         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
 809         INIT_LIST_HEAD(&env->calls);
 810         env->local_weights     = NULL;
 811         env->n_nodes           = -2; /* do not count count Start, End */
 812         env->n_blocks          = -2; /* do not count count Start, End Block */
 813         env->n_nodes_orig      = -2; /* do not count Start, End */
 814         env->n_call_nodes      = 0;
 815         env->n_call_nodes_orig = 0;
 816         env->n_callers         = 0;
 817         env->n_callers_orig    = 0;
 818         env->got_inline        = 0;
 819         env->recursive         = 0;
 820         return env;
 821 }
 822
 823 typedef struct walker_env {
 824         inline_irg_env *x;     /**< the inline environment */
 825         char ignore_runtime;   /**< the ignore runtime flag */
 826         char ignore_callers;   /**< if set, do change callers data */
 827 } wenv_t;
 828
 829 /**
 830  * post-walker: collect all calls in the inline-environment
 831  * of a graph and sum some statistics.
 832  */
 833 static void collect_calls2(ir_node *call, void *ctx)
 834 {
 835         wenv_t         *env = (wenv_t*)ctx;
 836         inline_irg_env *x = env->x;
 837         unsigned        code = get_irn_opcode(call);
 838         ir_graph       *callee;
 839         call_entry     *entry;
 840
 841         /* count meaningful nodes in irg */
 842         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
 843                 if (code != iro_Block) {
 844                         ++x->n_nodes;
 845                         ++x->n_nodes_orig;
 846                 } else {
 847                         ++x->n_blocks;
 848                 }
 849         }
 850
 851         if (code != iro_Call) return;
 852
 853         /* check, if it's a runtime call */
 854         if (env->ignore_runtime) {
 855                 ir_node *symc = get_Call_ptr(call);
 856
 857                 if (is_Global(symc)) {
 858                         ir_entity *ent = get_Global_entity(symc);
 859
 860                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
 861                                 return;
 862                 }
 863         }
 864
 865         /* collect all call nodes */
 866         ++x->n_call_nodes;
 867         ++x->n_call_nodes_orig;
 868
 869         callee = get_call_called_irg(call);
 870         if (callee != NULL) {
 871                 if (! env->ignore_callers) {
 872                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
 873                         /* count all static callers */
 874                         ++callee_env->n_callers;
 875                         ++callee_env->n_callers_orig;
 876                 }
 877                 if (callee == current_ir_graph)
 878                         x->recursive = 1;
 879
 880                 /* link it in the list of possible inlinable entries */
 881                 entry = OALLOC(&temp_obst, call_entry);
 882                 entry->call       = call;
 883                 entry->callee     = callee;
 884                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
 885                 entry->benefice   = 0;
 886                 entry->local_adr  = 0;
 887                 entry->all_const  = 0;
 888
 889                 list_add_tail(&entry->list, &x->calls);
 890         }
 891 }
 892
 893 /**
 894  * Returns TRUE if the number of callers is 0 in the irg's environment,
 895  * hence this irg is a leave.
 896  */
 897 inline static int is_leave(ir_graph *irg)
 898 {
 899         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
 900         return env->n_call_nodes == 0;
 901 }
 902
 903 /**
 904  * Returns TRUE if the number of nodes in the callee is
 905  * smaller then size in the irg's environment.
 906  */
 907 inline static int is_smaller(ir_graph *callee, unsigned size)
 908 {
 909         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
 910         return env->n_nodes < size;
 911 }
 912
 913 /**
 914  * Duplicate a call entry.
 915  *
 916  * @param entry     the original entry to duplicate
 917  * @param new_call  the new call node
 918  * @param loop_depth_delta
 919  *                  delta value for the loop depth
 920  */
 921 static call_entry *duplicate_call_entry(const call_entry *entry,
 922                                         ir_node *new_call, int loop_depth_delta)
 923 {
 924         call_entry *nentry = OALLOC(&temp_obst, call_entry);
 925         nentry->call       = new_call;
 926         nentry->callee     = entry->callee;
 927         nentry->benefice   = entry->benefice;
 928         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
 929         nentry->local_adr  = entry->local_adr;
 930         nentry->all_const  = entry->all_const;
 931
 932         return nentry;
 933 }
 934
 935 /**
 936  * Append all call nodes of the source environment to the nodes of in the destination
 937  * environment.
 938  *
 939  * @param dst         destination environment
 940  * @param src         source environment
 941  * @param loop_depth  the loop depth of the call that is replaced by the src list
 942  */
 943 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
 944 {
 945         call_entry *entry, *nentry;
 946
 947         /* Note that the src list points to Call nodes in the inlined graph, but
 948            we need Call nodes in our graph. Luckily the inliner leaves this information
 949            in the link field. */
 950         list_for_each_entry(call_entry, entry, &src->calls, list) {
 951                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
 952                 list_add_tail(&nentry->list, &dst->calls);
 953         }
 954         dst->n_call_nodes += src->n_call_nodes;
 955         dst->n_nodes      += src->n_nodes;
 956 }
 957
 958 /*
 959  * Inlines small leave methods at call sites where the called address comes
 960  * from a Const node that references the entity representing the called
 961  * method.
 962  * The size argument is a rough measure for the code size of the method:
 963  * Methods where the obstack containing the firm graph is smaller than
 964  * size are inlined.
 965  */
 966 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
 967                             unsigned size, int ignore_runtime)
 968 {
 969         inline_irg_env   *env;
 970         ir_graph         *irg;
 971         size_t           i, n_irgs;
 972         ir_graph         *rem;
 973         int              did_inline;
 974         wenv_t           wenv;
 975         call_entry       *entry, *next;
 976         const call_entry *centry;
 977         pmap             *copied_graphs;
 978         pmap_entry       *pm_entry;
 979
 980         rem = current_ir_graph;
 981         obstack_init(&temp_obst);
 982
 983         /* a map for the copied graphs, used to inline recursive calls */
 984         copied_graphs = pmap_create();
 985
 986         /* extend all irgs by a temporary data structure for inlining. */
 987         n_irgs = get_irp_n_irgs();
 988         for (i = 0; i < n_irgs; ++i)
 989                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
 990
 991         /* Pre-compute information in temporary data structure. */
 992         wenv.ignore_runtime = ignore_runtime;
 993         wenv.ignore_callers = 0;
 994         for (i = 0; i < n_irgs; ++i) {
 995                 ir_graph *irg = get_irp_irg(i);
 996
 997                 assert(get_irg_phase_state(irg) != phase_building);
 998                 free_callee_info(irg);
 999
1000                 assure_cf_loop(irg);
1001                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1002                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1003         }
1004
1005         /* -- and now inline. -- */
1006
1007         /* Inline leaves recursively -- we might construct new leaves. */
1008         do {
1009                 did_inline = 0;
1010
1011                 for (i = 0; i < n_irgs; ++i) {
1012                         ir_node *call;
1013                         int phiproj_computed = 0;
1014
1015                         current_ir_graph = get_irp_irg(i);
1016                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1017
1018                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1019                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1020                                 ir_graph            *callee;
1021                                 irg_inline_property  prop;
1022
1023                                 if (env->n_nodes > maxsize)
1024                                         break;
1025
1026                                 call   = entry->call;
1027                                 callee = entry->callee;
1028
1029                                 prop = get_irg_inline_property(callee);
1030                                 if (prop == irg_inline_forbidden) {
1031                                         continue;
1032                                 }
1033
1034                                 if (is_leave(callee) && (
1035                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1036                                         if (!phiproj_computed) {
1037                                                 phiproj_computed = 1;
1038                                                 collect_phiprojs(current_ir_graph);
1039                                         }
1040                                         did_inline = inline_method(call, callee);
1041
1042                                         if (did_inline) {
1043                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1044
1045                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1046                                                 phiproj_computed = 0;
1047
1048                                                 /* Do some statistics */
1049                                                 env->got_inline = 1;
1050                                                 --env->n_call_nodes;
1051                                                 env->n_nodes += callee_env->n_nodes;
1052                                                 --callee_env->n_callers;
1053
1054                                                 /* remove this call from the list */
1055                                                 list_del(&entry->list);
1056                                                 continue;
1057                                         }
1058                                 }
1059                         }
1060                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1061                 }
1062         } while (did_inline);
1063
1064         /* inline other small functions. */
1065         for (i = 0; i < n_irgs; ++i) {
1066                 ir_node *call;
1067                 int phiproj_computed = 0;
1068
1069                 current_ir_graph = get_irp_irg(i);
1070                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1071
1072                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1073
1074                 /* note that the list of possible calls is updated during the process */
1075                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1076                         irg_inline_property prop;
1077                         ir_graph            *callee;
1078                         pmap_entry          *e;
1079
1080                         call   = entry->call;
1081                         callee = entry->callee;
1082
1083                         prop = get_irg_inline_property(callee);
1084                         if (prop == irg_inline_forbidden) {
1085                                 continue;
1086                         }
1087
1088                         e = pmap_find(copied_graphs, callee);
1089                         if (e != NULL) {
1090                                 /*
1091                                  * Remap callee if we have a copy.
1092                                  * FIXME: Should we do this only for recursive Calls ?
1093                                  */
1094                                 callee = (ir_graph*)e->value;
1095                         }
1096
1097                         if (prop >= irg_inline_forced ||
1098                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1099                                 if (current_ir_graph == callee) {
1100                                         /*
1101                                          * Recursive call: we cannot directly inline because we cannot walk
1102                                          * the graph and change it. So we have to make a copy of the graph
1103                                          * first.
1104                                          */
1105
1106                                         inline_irg_env *callee_env;
1107                                         ir_graph       *copy;
1108
1109                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1110
1111                                         /*
1112                                          * No copy yet, create one.
1113                                          * Note that recursive methods are never leaves, so it is sufficient
1114                                          * to test this condition here.
1115                                          */
1116                                         copy = create_irg_copy(callee);
1117
1118                                         /* create_irg_copy() destroys the Proj links, recompute them */
1119                                         phiproj_computed = 0;
1120
1121                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1122
1123                                         /* allocate new environment */
1124                                         callee_env = alloc_inline_irg_env();
1125                                         set_irg_link(copy, callee_env);
1126
1127                                         assure_cf_loop(copy);
1128                                         wenv.x              = callee_env;
1129                                         wenv.ignore_callers = 1;
1130                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1131
1132                                         /*
1133                                          * Enter the entity of the original graph. This is needed
1134                                          * for inline_method(). However, note that ent->irg still points
1135                                          * to callee, NOT to copy.
1136                                          */
1137                                         set_irg_entity(copy, get_irg_entity(callee));
1138
1139                                         pmap_insert(copied_graphs, callee, copy);
1140                                         callee = copy;
1141
1142                                         /* we have only one caller: the original graph */
1143                                         callee_env->n_callers      = 1;
1144                                         callee_env->n_callers_orig = 1;
1145                                 }
1146                                 if (! phiproj_computed) {
1147                                         phiproj_computed = 1;
1148                                         collect_phiprojs(current_ir_graph);
1149                                 }
1150                                 did_inline = inline_method(call, callee);
1151                                 if (did_inline) {
1152                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1153
1154                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1155                                         phiproj_computed = 0;
1156
1157                                         /* callee was inline. Append it's call list. */
1158                                         env->got_inline = 1;
1159                                         --env->n_call_nodes;
1160                                         append_call_list(env, callee_env, entry->loop_depth);
1161                                         --callee_env->n_callers;
1162
1163                                         /* after we have inlined callee, all called methods inside callee
1164                                            are now called once more */
1165                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1166                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1167                                                 ++penv->n_callers;
1168                                         }
1169
1170                                         /* remove this call from the list */
1171                                         list_del(&entry->list);
1172                                         continue;
1173                                 }
1174                         }
1175                 }
1176                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1177         }
1178
1179         for (i = 0; i < n_irgs; ++i) {
1180                 irg = get_irp_irg(i);
1181                 env = (inline_irg_env*)get_irg_link(irg);
1182
1183                 if (env->got_inline) {
1184                         optimize_graph_df(irg);
1185                         optimize_cf(irg);
1186                 }
1187                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1188                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1189                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1190                         env->n_callers_orig, env->n_callers,
1191                         get_entity_name(get_irg_entity(irg))));
1192                 }
1193         }
1194
1195         /* kill the copied graphs: we don't need them anymore */
1196         foreach_pmap(copied_graphs, pm_entry) {
1197                 ir_graph *copy = (ir_graph*)pm_entry->value;
1198
1199                 /* reset the entity, otherwise it will be deleted in the next step ... */
1200                 set_irg_entity(copy, NULL);
1201                 free_ir_graph(copy);
1202         }
1203         pmap_destroy(copied_graphs);
1204
1205         obstack_free(&temp_obst, NULL);
1206         current_ir_graph = rem;
1207 }
1208
1209 typedef struct inline_leave_functions_pass_t {
1210         ir_prog_pass_t pass;
1211         unsigned       maxsize;
1212         unsigned       leavesize;
1213         unsigned       size;
1214         int            ignore_runtime;
1215 } inline_leave_functions_pass_t;
1216
1217 /**
1218  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1219  */
1220 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1221 {
1222         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1223
1224         (void)irp;
1225         inline_leave_functions(
1226                 pass->maxsize, pass->leavesize,
1227                 pass->size, pass->ignore_runtime);
1228         return 0;
1229 }
1230
1231 /* create a pass for inline_leave_functions() */
1232 ir_prog_pass_t *inline_leave_functions_pass(
1233         const char *name, unsigned maxsize, unsigned leavesize,
1234         unsigned size, int ignore_runtime)
1235 {
1236         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1237
1238         pass->maxsize        = maxsize;
1239         pass->leavesize      = leavesize;
1240         pass->size           = size;
1241         pass->ignore_runtime = ignore_runtime;
1242
1243         return def_prog_pass_constructor(
1244                 &pass->pass,
1245                 name ? name : "inline_leave_functions",
1246                 inline_leave_functions_wrapper);
1247 }
1248
1249 /**
1250  * Calculate the parameter weights for transmitting the address of a local variable.
1251  */
1252 static unsigned calc_method_local_weight(ir_node *arg)
1253 {
1254         int      i, j, k;
1255         unsigned v, weight = 0;
1256
1257         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1258                 ir_node *succ = get_irn_out(arg, i);
1259
1260                 switch (get_irn_opcode(succ)) {
1261                 case iro_Load:
1262                 case iro_Store:
1263                         /* Loads and Store can be removed */
1264                         weight += 3;
1265                         break;
1266                 case iro_Sel:
1267                         /* check if all args are constant */
1268                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1269                                 ir_node *idx = get_Sel_index(succ, j);
1270                                 if (! is_Const(idx))
1271                                         return 0;
1272                         }
1273                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1274                            some unsupported node. */
1275                         v = calc_method_local_weight(succ);
1276                         if (v == 0)
1277                                 return 0;
1278                         /* we can kill one Sel with constant indexes, this is cheap */
1279                         weight += v + 1;
1280                         break;
1281                 case iro_Id:
1282                         /* when looking backward we might find Id nodes */
1283                         weight += calc_method_local_weight(succ);
1284                         break;
1285                 case iro_Tuple:
1286                         /* unoptimized tuple */
1287                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1288                                 ir_node *pred = get_Tuple_pred(succ, j);
1289                                 if (pred == arg) {
1290                                         /* look for Proj(j) */
1291                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1292                                                 ir_node *succ_succ = get_irn_out(succ, k);
1293                                                 if (is_Proj(succ_succ)) {
1294                                                         if (get_Proj_proj(succ_succ) == j) {
1295                                                                 /* found */
1296                                                                 weight += calc_method_local_weight(succ_succ);
1297                                                         }
1298                                                 } else {
1299                                                         /* this should NOT happen */
1300                                                         return 0;
1301                                                 }
1302                                         }
1303                                 }
1304                         }
1305                         break;
1306                 default:
1307                         /* any other node: unsupported yet or bad. */
1308                         return 0;
1309                 }
1310         }
1311         return weight;
1312 }
1313
1314 /**
1315  * Calculate the parameter weights for transmitting the address of a local variable.
1316  */
1317 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1318 {
1319         ir_entity *ent = get_irg_entity(irg);
1320         ir_type  *mtp;
1321         size_t   nparams;
1322         int      i;
1323         long     proj_nr;
1324         ir_node  *irg_args, *arg;
1325
1326         mtp      = get_entity_type(ent);
1327         nparams  = get_method_n_params(mtp);
1328
1329         /* allocate a new array. currently used as 'analysed' flag */
1330         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1331
1332         /* If the method haven't parameters we have nothing to do. */
1333         if (nparams <= 0)
1334                 return;
1335
1336         assure_irg_outs(irg);
1337         irg_args = get_irg_args(irg);
1338         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1339                 arg     = get_irn_out(irg_args, i);
1340                 proj_nr = get_Proj_proj(arg);
1341                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1342         }
1343 }
1344
1345 /**
1346  * Calculate the benefice for transmitting an local variable address.
1347  * After inlining, the local variable might be transformed into a
1348  * SSA variable by scalar_replacement().
1349  */
1350 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1351 {
1352         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1353
1354         if (env->local_weights == NULL)
1355                 analyze_irg_local_weights(env, callee);
1356
1357         if (pos < ARR_LEN(env->local_weights))
1358                 return env->local_weights[pos];
1359         return 0;
1360 }
1361
1362 /**
1363  * Calculate a benefice value for inlining the given call.
1364  *
1365  * @param call       the call node we have to inspect
1366  * @param callee     the called graph
1367  */
1368 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1369 {
1370         ir_node   *call = entry->call;
1371         ir_entity *ent  = get_irg_entity(callee);
1372         ir_node   *frame_ptr;
1373         ir_type   *mtp;
1374         int       weight = 0;
1375         int       i, n_params, all_const;
1376         unsigned  cc, v;
1377         irg_inline_property prop;
1378
1379         inline_irg_env *callee_env;
1380
1381         prop = get_irg_inline_property(callee);
1382         if (prop == irg_inline_forbidden) {
1383                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1384                     call, callee));
1385                 return entry->benefice = INT_MIN;
1386         }
1387
1388         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1389                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1390                     call, callee));
1391                 return entry->benefice = INT_MIN;
1392         }
1393
1394         /* costs for every passed parameter */
1395         n_params = get_Call_n_params(call);
1396         mtp      = get_entity_type(ent);
1397         cc       = get_method_calling_convention(mtp);
1398         if (cc & cc_reg_param) {
1399                 /* register parameter, smaller costs for register parameters */
1400                 int max_regs = cc & ~cc_bits;
1401
1402                 if (max_regs < n_params)
1403                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1404                 else
1405                         weight += n_params * 2;
1406         } else {
1407                 /* parameters are passed an stack */
1408                 weight += 5 * n_params;
1409         }
1410
1411         /* constant parameters improve the benefice */
1412         frame_ptr = get_irg_frame(current_ir_graph);
1413         all_const = 1;
1414         for (i = 0; i < n_params; ++i) {
1415                 ir_node *param = get_Call_param(call, i);
1416
1417                 if (is_Const(param)) {
1418                         weight += get_method_param_weight(ent, i);
1419                 } else {
1420                         all_const = 0;
1421                         if (is_SymConst(param))
1422                                 weight += get_method_param_weight(ent, i);
1423                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1424                                 /*
1425                                  * An address of a local variable is transmitted. After
1426                                  * inlining, scalar_replacement might be able to remove the
1427                                  * local variable, so honor this.
1428                                  */
1429                                 v = get_method_local_adress_weight(callee, i);
1430                                 weight += v;
1431                                 if (v > 0)
1432                                         entry->local_adr = 1;
1433                         }
1434                 }
1435         }
1436         entry->all_const = all_const;
1437
1438         callee_env = (inline_irg_env*)get_irg_link(callee);
1439         if (callee_env->n_callers == 1 &&
1440             callee != current_ir_graph &&
1441             !entity_is_externally_visible(ent)) {
1442                 weight += 700;
1443         }
1444
1445         /* give a bonus for functions with one block */
1446         if (callee_env->n_blocks == 1)
1447                 weight = weight * 3 / 2;
1448
1449         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1450         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1451                 weight += 2000;
1452
1453         /* and finally for leaves: they do not increase the register pressure
1454            because of callee safe registers */
1455         if (callee_env->n_call_nodes == 0)
1456                 weight += 400;
1457
1458         /** it's important to inline inner loops first */
1459         if (entry->loop_depth > 30)
1460                 weight += 30 * 1024;
1461         else
1462                 weight += entry->loop_depth * 1024;
1463
1464         /*
1465          * All arguments constant is probably a good sign, give an extra bonus
1466          */
1467         if (all_const)
1468                 weight += 1024;
1469
1470         return entry->benefice = weight;
1471 }
1472
1473 typedef struct walk_env_t {
1474         ir_graph **irgs;
1475         size_t   last_irg;
1476 } walk_env_t;
1477
1478 /**
1479  * Callgraph walker, collect all visited graphs.
1480  */
1481 static void callgraph_walker(ir_graph *irg, void *data)
1482 {
1483         walk_env_t *env = (walk_env_t *)data;
1484         env->irgs[env->last_irg++] = irg;
1485 }
1486
1487 /**
1488  * Creates an inline order for all graphs.
1489  *
1490  * @return the list of graphs.
1491  */
1492 static ir_graph **create_irg_list(void)
1493 {
1494         ir_entity  **free_methods;
1495         size_t     n_irgs = get_irp_n_irgs();
1496         walk_env_t env;
1497
1498         cgana(&free_methods);
1499         xfree(free_methods);
1500
1501         compute_callgraph();
1502
1503         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1504         env.last_irg = 0;
1505
1506         callgraph_walk(NULL, callgraph_walker, &env);
1507         assert(n_irgs == env.last_irg);
1508
1509         return env.irgs;
1510 }
1511
1512 /**
1513  * Push a call onto the priority list if its benefice is big enough.
1514  *
1515  * @param pqueue   the priority queue of calls
1516  * @param call     the call entry
1517  * @param inlien_threshold
1518  *                 the threshold value
1519  */
1520 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1521                             int inline_threshold)
1522 {
1523         ir_graph            *callee  = call->callee;
1524         irg_inline_property prop     = get_irg_inline_property(callee);
1525         int                 benefice = calc_inline_benefice(call, callee);
1526
1527         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1528             get_irn_irg(call->call), call->call, callee, benefice));
1529
1530         if (prop < irg_inline_forced && benefice < inline_threshold) {
1531                 return;
1532         }
1533
1534         pqueue_put(pqueue, call, benefice);
1535 }
1536
1537 /**
1538  * Try to inline calls into a graph.
1539  *
1540  * @param irg      the graph into which we inline
1541  * @param maxsize  do NOT inline if the size of irg gets
1542  *                 bigger than this amount
1543  * @param inline_threshold
1544  *                 threshold value for inline decision
1545  * @param copied_graphs
1546  *                 map containing copied of recursive graphs
1547  */
1548 static void inline_into(ir_graph *irg, unsigned maxsize,
1549                         int inline_threshold, pmap *copied_graphs)
1550 {
1551         int            phiproj_computed = 0;
1552         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1553         call_entry     *curr_call;
1554         wenv_t         wenv;
1555         pqueue_t       *pqueue;
1556
1557         if (env->n_call_nodes == 0)
1558                 return;
1559
1560         if (env->n_nodes > maxsize) {
1561                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1562                 return;
1563         }
1564
1565         current_ir_graph = irg;
1566         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1567
1568         /* put irgs into the pqueue */
1569         pqueue = new_pqueue();
1570
1571         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1572                 assert(is_Call(curr_call->call));
1573                 maybe_push_call(pqueue, curr_call, inline_threshold);
1574         }
1575
1576         /* note that the list of possible calls is updated during the process */
1577         while (!pqueue_empty(pqueue)) {
1578                 int                 did_inline;
1579                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1580                 ir_graph            *callee     = curr_call->callee;
1581                 ir_node             *call_node  = curr_call->call;
1582                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1583                 irg_inline_property prop        = get_irg_inline_property(callee);
1584                 int                 loop_depth;
1585                 const call_entry    *centry;
1586                 pmap_entry          *e;
1587
1588                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1589                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1590                                                 env->n_nodes, callee, callee_env->n_nodes));
1591                         continue;
1592                 }
1593
1594                 e = pmap_find(copied_graphs, callee);
1595                 if (e != NULL) {
1596                         int benefice = curr_call->benefice;
1597                         /*
1598                          * Reduce the weight for recursive function IFF not all arguments are const.
1599                          * inlining recursive functions is rarely good.
1600                          */
1601                         if (!curr_call->all_const)
1602                                 benefice -= 2000;
1603                         if (benefice < inline_threshold)
1604                                 continue;
1605
1606                         /*
1607                          * Remap callee if we have a copy.
1608                          */
1609                         callee     = (ir_graph*)e->value;
1610                         callee_env = (inline_irg_env*)get_irg_link(callee);
1611                 }
1612
1613                 if (current_ir_graph == callee) {
1614                         /*
1615                          * Recursive call: we cannot directly inline because we cannot
1616                          * walk the graph and change it. So we have to make a copy of
1617                          * the graph first.
1618                          */
1619                         int benefice = curr_call->benefice;
1620                         ir_graph *copy;
1621
1622                         /*
1623                          * Reduce the weight for recursive function IFF not all arguments are const.
1624                          * inlining recursive functions is rarely good.
1625                          */
1626                         if (!curr_call->all_const)
1627                                 benefice -= 2000;
1628                         if (benefice < inline_threshold)
1629                                 continue;
1630
1631                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1632
1633                         /*
1634                          * No copy yet, create one.
1635                          * Note that recursive methods are never leaves, so it is
1636                          * sufficient to test this condition here.
1637                          */
1638                         copy = create_irg_copy(callee);
1639
1640                         /* create_irg_copy() destroys the Proj links, recompute them */
1641                         phiproj_computed = 0;
1642
1643                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1644
1645                         /* allocate a new environment */
1646                         callee_env = alloc_inline_irg_env();
1647                         set_irg_link(copy, callee_env);
1648
1649                         assure_cf_loop(copy);
1650                         wenv.x              = callee_env;
1651                         wenv.ignore_callers = 1;
1652                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1653
1654                         /*
1655                          * Enter the entity of the original graph. This is needed
1656                          * for inline_method(). However, note that ent->irg still points
1657                          * to callee, NOT to copy.
1658                          */
1659                         set_irg_entity(copy, get_irg_entity(callee));
1660
1661                         pmap_insert(copied_graphs, callee, copy);
1662                         callee = copy;
1663
1664                         /* we have only one caller: the original graph */
1665                         callee_env->n_callers      = 1;
1666                         callee_env->n_callers_orig = 1;
1667                 }
1668                 if (! phiproj_computed) {
1669                         phiproj_computed = 1;
1670                         collect_phiprojs(current_ir_graph);
1671                 }
1672                 did_inline = inline_method(call_node, callee);
1673                 if (!did_inline)
1674                         continue;
1675
1676                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1677                 phiproj_computed = 0;
1678
1679                 /* remove it from the caller list */
1680                 list_del(&curr_call->list);
1681
1682                 /* callee was inline. Append it's call list. */
1683                 env->got_inline = 1;
1684                 --env->n_call_nodes;
1685
1686                 /* we just generate a bunch of new calls */
1687                 loop_depth = curr_call->loop_depth;
1688                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1689                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1690                         ir_node        *new_call;
1691                         call_entry     *new_entry;
1692
1693                         /* after we have inlined callee, all called methods inside
1694                          * callee are now called once more */
1695                         ++penv->n_callers;
1696
1697                         /* Note that the src list points to Call nodes in the inlined graph,
1698                          * but we need Call nodes in our graph. Luckily the inliner leaves
1699                          * this information in the link field. */
1700                         new_call = (ir_node*)get_irn_link(centry->call);
1701                         assert(is_Call(new_call));
1702
1703                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1704                         list_add_tail(&new_entry->list, &env->calls);
1705                         maybe_push_call(pqueue, new_entry, inline_threshold);
1706                 }
1707
1708                 env->n_call_nodes += callee_env->n_call_nodes;
1709                 env->n_nodes += callee_env->n_nodes;
1710                 --callee_env->n_callers;
1711         }
1712         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1713         del_pqueue(pqueue);
1714 }
1715
1716 /*
1717  * Heuristic inliner. Calculates a benefice value for every call and inlines
1718  * those calls with a value higher than the threshold.
1719  */
1720 void inline_functions(unsigned maxsize, int inline_threshold,
1721                       opt_ptr after_inline_opt)
1722 {
1723         inline_irg_env   *env;
1724         size_t           i, n_irgs;
1725         ir_graph         *rem;
1726         wenv_t           wenv;
1727         pmap             *copied_graphs;
1728         pmap_entry       *pm_entry;
1729         ir_graph         **irgs;
1730
1731         rem = current_ir_graph;
1732         obstack_init(&temp_obst);
1733
1734         irgs = create_irg_list();
1735
1736         /* a map for the copied graphs, used to inline recursive calls */
1737         copied_graphs = pmap_create();
1738
1739         /* extend all irgs by a temporary data structure for inlining. */
1740         n_irgs = get_irp_n_irgs();
1741         for (i = 0; i < n_irgs; ++i)
1742                 set_irg_link(irgs[i], alloc_inline_irg_env());
1743
1744         /* Pre-compute information in temporary data structure. */
1745         wenv.ignore_runtime = 0;
1746         wenv.ignore_callers = 0;
1747         for (i = 0; i < n_irgs; ++i) {
1748                 ir_graph *irg = irgs[i];
1749
1750                 free_callee_info(irg);
1751
1752                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1753                 assure_cf_loop(irg);
1754                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1755         }
1756
1757         /* -- and now inline. -- */
1758         for (i = 0; i < n_irgs; ++i) {
1759                 ir_graph *irg = irgs[i];
1760
1761                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1762         }
1763
1764         for (i = 0; i < n_irgs; ++i) {
1765                 ir_graph *irg = irgs[i];
1766
1767                 env = (inline_irg_env*)get_irg_link(irg);
1768                 if (env->got_inline && after_inline_opt != NULL) {
1769                         /* this irg got calls inlined: optimize it */
1770                         after_inline_opt(irg);
1771                 }
1772                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1773                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1774                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1775                         env->n_callers_orig, env->n_callers,
1776                         get_entity_name(get_irg_entity(irg))));
1777                 }
1778         }
1779
1780         /* kill the copied graphs: we don't need them anymore */
1781         foreach_pmap(copied_graphs, pm_entry) {
1782                 ir_graph *copy = (ir_graph*)pm_entry->value;
1783
1784                 /* reset the entity, otherwise it will be deleted in the next step ... */
1785                 set_irg_entity(copy, NULL);
1786                 free_ir_graph(copy);
1787         }
1788         pmap_destroy(copied_graphs);
1789
1790         xfree(irgs);
1791
1792         obstack_free(&temp_obst, NULL);
1793         current_ir_graph = rem;
1794 }
1795
1796 typedef struct inline_functions_pass_t {
1797         ir_prog_pass_t pass;
1798         unsigned       maxsize;
1799         int            inline_threshold;
1800         opt_ptr        after_inline_opt;
1801 } inline_functions_pass_t;
1802
1803 /**
1804  * Wrapper to run inline_functions() as a ir_prog pass.
1805  */
1806 static int inline_functions_wrapper(ir_prog *irp, void *context)
1807 {
1808         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1809
1810         (void)irp;
1811         inline_functions(pass->maxsize, pass->inline_threshold,
1812                          pass->after_inline_opt);
1813         return 0;
1814 }
1815
1816 /* create a ir_prog pass for inline_functions */
1817 ir_prog_pass_t *inline_functions_pass(
1818           const char *name, unsigned maxsize, int inline_threshold,
1819           opt_ptr after_inline_opt)
1820 {
1821         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1822
1823         pass->maxsize          = maxsize;
1824         pass->inline_threshold = inline_threshold;
1825         pass->after_inline_opt = after_inline_opt;
1826
1827         return def_prog_pass_constructor(
1828                 &pass->pass, name ? name : "inline_functions",
1829                 inline_functions_wrapper);
1830 }
1831
1832 void firm_init_inline(void)
1833 {
1834         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1835 }