nsz Git - libfirm/blob - ir/opt/opt_inline.c

   1 /*
   2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief    Dead node elimination and Procedure Inlining.
  23  * @author   Michael Beck, Goetz Lindenmaier
  24  * @version  $Id$
  25  */
  26 #include "config.h"
  27
  28 #include <limits.h>
  29 #include <stdbool.h>
  30 #include <assert.h>
  31
  32 #include "irnode_t.h"
  33 #include "irgraph_t.h"
  34 #include "irprog_t.h"
  35
  36 #include "iroptimize.h"
  37 #include "ircons_t.h"
  38 #include "iropt_t.h"
  39 #include "irgopt.h"
  40 #include "irgmod.h"
  41 #include "irgwalk.h"
  42
  43 #include "array_t.h"
  44 #include "list.h"
  45 #include "pset.h"
  46 #include "pmap.h"
  47 #include "pdeq.h"
  48 #include "xmalloc.h"
  49 #include "pqueue.h"
  50
  51 #include "irouts.h"
  52 #include "irloop_t.h"
  53 #include "irbackedge_t.h"
  54 #include "opt_init.h"
  55 #include "cgana.h"
  56 #include "trouts.h"
  57 #include "error.h"
  58
  59 #include "analyze_irg_args.h"
  60 #include "iredges_t.h"
  61 #include "irflag_t.h"
  62 #include "irhooks.h"
  63 #include "irtools.h"
  64 #include "iropt_dbg.h"
  65 #include "irpass_t.h"
  66 #include "irphase_t.h"
  67
  68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  69
  70 /*------------------------------------------------------------------*/
  71 /* Routines for dead node elimination / copying garbage collection  */
  72 /* of the obstack.                                                  */
  73 /*------------------------------------------------------------------*/
  74
  75 /**
  76  * Remember the new node in the old node by using a field all nodes have.
  77  */
  78 static void set_new_node(ir_node *node, ir_node *new_node)
  79 {
  80         set_irn_link(node, new_node);
  81 }
  82
  83 /**
  84  * Get this new node, before the old node is forgotten.
  85  */
  86 static inline ir_node *get_new_node(ir_node *old_node)
  87 {
  88         assert(irn_visited(old_node));
  89         return (ir_node*) get_irn_link(old_node);
  90 }
  91
  92 /*--------------------------------------------------------------------*/
  93 /*  Functionality for inlining                                         */
  94 /*--------------------------------------------------------------------*/
  95
  96 /**
  97  * Copy node for inlineing.  Updates attributes that change when
  98  * inlineing but not for dead node elimination.
  99  *
 100  * Copies the node by calling copy_node() and then updates the entity if
 101  * it's a local one.  env must be a pointer of the frame type of the
 102  * inlined procedure. The new entities must be in the link field of
 103  * the entities.
 104  */
 105 static void copy_node_inline(ir_node *node, void *env)
 106 {
 107         ir_graph *new_irg  = (ir_graph*) env;
 108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
 109
 110         set_new_node(node, new_node);
 111         if (is_Sel(node)) {
 112                 ir_graph  *old_irg        = get_irn_irg(node);
 113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
 114                 ir_entity *old_entity     = get_Sel_entity(node);
 115                 assert(is_Sel(new_node));
 116                 /* use copied entities from the new frame */
 117                 if (get_entity_owner(old_entity) == old_frame_type) {
 118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
 119                         assert(new_entity != NULL);
 120                         set_Sel_entity(new_node, new_entity);
 121                 }
 122         } else if (is_Block(new_node)) {
 123                 new_node->attr.block.irg.irg = new_irg;
 124         }
 125 }
 126
 127 static void set_preds_inline(ir_node *node, void *env)
 128 {
 129         ir_node *new_node;
 130
 131         irn_rewire_inputs(node);
 132
 133         /* move constants into start block */
 134         new_node = get_new_node(node);
 135         if (is_irn_constlike(new_node)) {
 136                 ir_graph *new_irg     = (ir_graph *) env;
 137                 ir_node  *start_block = get_irg_start_block(new_irg);
 138                 set_nodes_block(new_node, start_block);
 139         }
 140 }
 141
 142 /**
 143  * Walker: checks if P_value_arg_base is used.
 144  */
 145 static void find_addr(ir_node *node, void *env)
 146 {
 147         bool *allow_inline = (bool*)env;
 148
 149         if (is_Sel(node)) {
 150                 ir_graph *irg = current_ir_graph;
 151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
 152                         /* access to frame */
 153                         ir_entity *ent = get_Sel_entity(node);
 154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
 155                                 /* access to value_type */
 156                                 *allow_inline = false;
 157                         }
 158                         if (is_parameter_entity(ent)) {
 159                                 *allow_inline = false;
 160                         }
 161                 }
 162         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
 163                 /* From GCC:
 164                  * Refuse to inline alloca call unless user explicitly forced so as this
 165                  * may change program's memory overhead drastically when the function
 166                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
 167                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
 168                  *
 169                  * Sorrily this is true with our implementation also.
 170                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
 171                  * this disables inlining of functions using VLA (which are completely
 172                  * save).
 173                  *
 174                  * 2 Solutions:
 175                  * - add a flag to the Alloc node for "real" alloca() calls
 176                  * - add a new Stack-Restore node at the end of a function using
 177                  *   alloca()
 178                  */
 179                 *allow_inline = false;
 180         }
 181 }
 182
 183 /**
 184  * Check if we can inline a given call.
 185  * Currently, we cannot inline two cases:
 186  * - call with compound arguments
 187  * - graphs that take the address of a parameter
 188  *
 189  * check these conditions here
 190  */
 191 static bool can_inline(ir_node *call, ir_graph *called_graph)
 192 {
 193         ir_entity          *called      = get_irg_entity(called_graph);
 194         ir_type            *called_type = get_entity_type(called);
 195         ir_type            *call_type   = get_Call_type(call);
 196         size_t              n_params    = get_method_n_params(called_type);
 197         size_t              n_arguments = get_method_n_params(call_type);
 198         size_t              n_res       = get_method_n_ress(called_type);
 199         irg_inline_property prop        = get_irg_inline_property(called_graph);
 200         size_t              i;
 201         bool                res;
 202
 203         if (prop == irg_inline_forbidden)
 204                 return false;
 205
 206         if (n_arguments != n_params) {
 207                 /* this is a bad feature of C: without a prototype, we can
 208                  * call a function with less parameters than needed. Currently
 209                  * we don't support this, although we could use Unknown than. */
 210                 return false;
 211         }
 212         if (n_res != get_method_n_ress(call_type)) {
 213                 return false;
 214         }
 215
 216         /* Argh, compiling C has some bad consequences:
 217          * It is implementation dependent what happens in that case.
 218          * We support inlining, if the bitsize of the types matches AND
 219          * the same arithmetic is used. */
 220         for (i = 0; i < n_params; ++i) {
 221                 ir_type *param_tp = get_method_param_type(called_type, i);
 222                 ir_type *arg_tp   = get_method_param_type(call_type, i);
 223
 224                 if (param_tp != arg_tp) {
 225                         ir_mode *pmode = get_type_mode(param_tp);
 226                         ir_mode *amode = get_type_mode(arg_tp);
 227
 228                         if (pmode == NULL || amode == NULL)
 229                                 return false;
 230                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
 231                                 return false;
 232                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
 233                                 return false;
 234                         /* otherwise we can simply "reinterpret" the bits */
 235                 }
 236         }
 237         for (i = 0; i < n_res; ++i) {
 238                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
 239                 ir_type *used_res_tp = get_method_res_type(call_type, i);
 240
 241                 if (decl_res_tp != used_res_tp) {
 242                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
 243                         ir_mode *used_mode = get_type_mode(used_res_tp);
 244                         if (decl_mode == NULL || used_mode == NULL)
 245                                 return false;
 246                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
 247                                 return false;
 248                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
 249                                 return false;
 250                         /* otherwise we can "reinterpret" the bits */
 251                 }
 252         }
 253
 254         /* check parameters for compound arguments */
 255         for (i = 0; i < n_params; ++i) {
 256                 ir_type *p_type = get_method_param_type(call_type, i);
 257
 258                 if (is_compound_type(p_type))
 259                         return false;
 260         }
 261
 262         /* check results for compound arguments */
 263         for (i = 0; i < n_res; ++i) {
 264                 ir_type *r_type = get_method_res_type(call_type, i);
 265
 266                 if (is_compound_type(r_type))
 267                         return false;
 268         }
 269
 270         res = true;
 271         irg_walk_graph(called_graph, find_addr, NULL, &res);
 272
 273         return res;
 274 }
 275
 276 enum exc_mode {
 277         exc_handler,    /**< There is a handler. */
 278         exc_no_handler  /**< Exception handling not represented. */
 279 };
 280
 281 /**
 282  * copy all entities on the stack frame on 1 irg to the stackframe of another.
 283  * Sets entity links of the old entities to the copies
 284  */
 285 static void copy_frame_entities(ir_graph *from, ir_graph *to)
 286 {
 287         ir_type *from_frame = get_irg_frame_type(from);
 288         ir_type *to_frame   = get_irg_frame_type(to);
 289         size_t   n_members  = get_class_n_members(from_frame);
 290         size_t   i;
 291         assert(from_frame != to_frame);
 292
 293         for (i = 0; i < n_members; ++i) {
 294                 ir_entity *old_ent = get_class_member(from_frame, i);
 295                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
 296                 set_entity_link(old_ent, new_ent);
 297         }
 298 }
 299
 300 /* Inlines a method at the given call site. */
 301 int inline_method(ir_node *call, ir_graph *called_graph)
 302 {
 303         ir_node       *pre_call;
 304         ir_node       *post_call, *post_bl;
 305         ir_node       *in[pn_Start_max+1];
 306         ir_node       *end, *end_bl, *block;
 307         ir_node       **res_pred;
 308         ir_node       **cf_pred;
 309         ir_node       **args_in;
 310         ir_node       *ret, *phi;
 311         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
 312         int           irn_arity, n_params;
 313         int           n_mem_phi;
 314         enum exc_mode exc_handling;
 315         ir_type       *mtp;
 316         ir_type       *ctp;
 317         ir_entity     *ent;
 318         ir_graph      *rem;
 319         ir_graph      *irg = get_irn_irg(call);
 320
 321         /* we cannot inline some types of calls */
 322         if (! can_inline(call, called_graph))
 323                 return 0;
 324
 325         /* We cannot inline a recursive call. The graph must be copied before
 326          * the call the inline_method() using create_irg_copy(). */
 327         if (called_graph == irg)
 328                 return 0;
 329
 330         ent      = get_irg_entity(called_graph);
 331         mtp      = get_entity_type(ent);
 332         ctp      = get_Call_type(call);
 333         n_params = get_method_n_params(mtp);
 334         n_res    = get_method_n_ress(mtp);
 335
 336         rem = current_ir_graph;
 337         current_ir_graph = irg;
 338
 339         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
 340
 341         /* optimizations can cause problems when allocating new nodes */
 342         rem_opt = get_opt_optimize();
 343         set_optimize(0);
 344
 345         /* Handle graph state */
 346         assert(get_irg_phase_state(irg) != phase_building);
 347         assert(get_irg_pinned(irg) == op_pin_state_pinned);
 348         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
 349         set_irg_extblk_inconsistent(irg);
 350         set_irg_doms_inconsistent(irg);
 351         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 352         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
 353         edges_deactivate(irg);
 354
 355         /* here we know we WILL inline, so inform the statistics */
 356         hook_inline(call, called_graph);
 357
 358         /* -- Decide how to handle exception control flow: Is there a handler
 359            for the Call node, or do we branch directly to End on an exception?
 360            exc_handling:
 361            0 There is a handler.
 362            2 Exception handling not represented in Firm. -- */
 363         {
 364                 ir_node *Xproj = NULL;
 365                 ir_node *proj;
 366                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
 367                      proj = (ir_node*)get_irn_link(proj)) {
 368                         long proj_nr = get_Proj_proj(proj);
 369                         if (proj_nr == pn_Call_X_except) Xproj = proj;
 370                 }
 371                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
 372         }
 373
 374         /* create the argument tuple */
 375         args_in = ALLOCAN(ir_node*, n_params);
 376
 377         block = get_nodes_block(call);
 378         for (i = n_params - 1; i >= 0; --i) {
 379                 ir_node *arg      = get_Call_param(call, i);
 380                 ir_type *param_tp = get_method_param_type(mtp, i);
 381                 ir_mode *mode     = get_type_mode(param_tp);
 382
 383                 if (mode != get_irn_mode(arg)) {
 384                         arg = new_r_Conv(block, arg, mode);
 385                 }
 386                 args_in[i] = arg;
 387         }
 388
 389         /* the procedure and later replaces the Start node of the called graph.
 390          * Post_call is the old Call node and collects the results of the called
 391          * graph. Both will end up being a tuple. */
 392         post_bl = get_nodes_block(call);
 393         /* XxMxPxPxPxT of Start + parameter of Call */
 394         in[pn_Start_M]              = get_Call_mem(call);
 395         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
 396         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
 397         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
 398         pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
 399         post_call = call;
 400
 401         /* --
 402            The new block gets the ins of the old block, pre_call and all its
 403            predecessors and all Phi nodes. -- */
 404         part_block(pre_call);
 405
 406         /* increment visited flag for later walk */
 407         inc_irg_visited(called_graph);
 408
 409         /* link some nodes to nodes in the current graph so instead of copying
 410          * the linked nodes will get used.
 411          * So the copier will use the created Tuple instead of copying the start
 412          * node, similar for singleton nodes like NoMem and Bad.
 413          * Note: this will prohibit predecessors to be copied - only do it for
 414          *       nodes without predecessors */
 415         {
 416                 ir_node *start_block;
 417                 ir_node *start;
 418                 ir_node *nomem;
 419
 420                 start_block = get_irg_start_block(called_graph);
 421                 set_new_node(start_block, get_nodes_block(pre_call));
 422                 mark_irn_visited(start_block);
 423
 424                 start = get_irg_start(called_graph);
 425                 set_new_node(start, pre_call);
 426                 mark_irn_visited(start);
 427
 428                 nomem = get_irg_no_mem(called_graph);
 429                 set_new_node(nomem, get_irg_no_mem(irg));
 430                 mark_irn_visited(nomem);
 431         }
 432
 433         /* entitiy link is used to link entities on old stackframe to the
 434          * new stackframe */
 435         irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
 436
 437         /* copy entities and nodes */
 438         assert(!irn_visited(get_irg_end(called_graph)));
 439         copy_frame_entities(called_graph, irg);
 440         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
 441                       irg);
 442
 443         irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
 444
 445         /* -- Merge the end of the inlined procedure with the call site -- */
 446         /* We will turn the old Call node into a Tuple with the following
 447            predecessors:
 448            -1:  Block of Tuple.
 449            0: Phi of all Memories of Return statements.
 450            1: Jmp from new Block that merges the control flow from all exception
 451            predecessors of the old end block.
 452            2: Tuple of all arguments.
 453            3: Phi of Exception memories.
 454            In case the old Call directly branches to End on an exception we don't
 455            need the block merging all exceptions nor the Phi of the exception
 456            memories.
 457         */
 458
 459         /* Precompute some values */
 460         end_bl = get_new_node(get_irg_end_block(called_graph));
 461         end    = get_new_node(get_irg_end(called_graph));
 462         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
 463         n_res  = get_method_n_ress(get_Call_type(call));
 464
 465         res_pred = XMALLOCN(ir_node*, n_res);
 466         cf_pred  = XMALLOCN(ir_node*, arity);
 467
 468         /* archive keepalives */
 469         irn_arity = get_irn_arity(end);
 470         for (i = 0; i < irn_arity; i++) {
 471                 ir_node *ka = get_End_keepalive(end, i);
 472                 if (! is_Bad(ka))
 473                         add_End_keepalive(get_irg_end(irg), ka);
 474         }
 475
 476         /* replace Return nodes by Jump nodes */
 477         n_ret = 0;
 478         for (i = 0; i < arity; i++) {
 479                 ir_node *ret;
 480                 ret = get_Block_cfgpred(end_bl, i);
 481                 if (is_Return(ret)) {
 482                         ir_node *block = get_nodes_block(ret);
 483                         cf_pred[n_ret] = new_r_Jmp(block);
 484                         n_ret++;
 485                 }
 486         }
 487         set_irn_in(post_bl, n_ret, cf_pred);
 488
 489         /* build a Tuple for all results of the method.
 490          * add Phi node if there was more than one Return. */
 491         turn_into_tuple(post_call, pn_Call_max+1);
 492         /* First the Memory-Phi */
 493         n_mem_phi = 0;
 494         for (i = 0; i < arity; i++) {
 495                 ret = get_Block_cfgpred(end_bl, i);
 496                 if (is_Return(ret)) {
 497                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
 498                 }
 499                 /* memory output for some exceptions is directly connected to End */
 500                 if (is_Call(ret)) {
 501                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
 502                 } else if (is_fragile_op(ret)) {
 503                         /* We rely that all cfops have the memory output at the same position. */
 504                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
 505                 } else if (is_Raise(ret)) {
 506                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
 507                 }
 508         }
 509         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
 510         set_Tuple_pred(call, pn_Call_M, phi);
 511         /* Conserve Phi-list for further inlinings -- but might be optimized */
 512         if (get_nodes_block(phi) == post_bl) {
 513                 set_irn_link(phi, get_irn_link(post_bl));
 514                 set_irn_link(post_bl, phi);
 515         }
 516         /* Now the real results */
 517         if (n_res > 0) {
 518                 ir_node *result_tuple;
 519                 for (j = 0; j < n_res; j++) {
 520                         ir_type *res_type = get_method_res_type(ctp, j);
 521                         ir_mode *res_mode = get_type_mode(res_type);
 522                         n_ret = 0;
 523                         for (i = 0; i < arity; i++) {
 524                                 ret = get_Block_cfgpred(end_bl, i);
 525                                 if (is_Return(ret)) {
 526                                         ir_node *res = get_Return_res(ret, j);
 527                                         if (get_irn_mode(res) != res_mode) {
 528                                                 ir_node *block = get_nodes_block(res);
 529                                                 res = new_r_Conv(block, res, res_mode);
 530                                         }
 531                                         cf_pred[n_ret] = res;
 532                                         n_ret++;
 533                                 }
 534                         }
 535                         if (n_ret > 0) {
 536                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
 537                         } else {
 538                                 phi = new_r_Bad(irg, res_mode);
 539                         }
 540                         res_pred[j] = phi;
 541                         /* Conserve Phi-list for further inlinings -- but might be optimized */
 542                         if (get_nodes_block(phi) == post_bl) {
 543                                 set_Phi_next(phi, get_Block_phis(post_bl));
 544                                 set_Block_phis(post_bl, phi);
 545                         }
 546                 }
 547                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
 548                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
 549         } else {
 550                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
 551         }
 552         /* handle the regular call */
 553         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
 554
 555         /* Finally the exception control flow.
 556            We have two possible situations:
 557            First if the Call branches to an exception handler:
 558            We need to add a Phi node to
 559            collect the memory containing the exception objects.  Further we need
 560            to add another block to get a correct representation of this Phi.  To
 561            this block we add a Jmp that resolves into the X output of the Call
 562            when the Call is turned into a tuple.
 563            Second: There is no exception edge. Just add all inlined exception
 564            branches to the End node.
 565          */
 566         if (exc_handling == exc_handler) {
 567                 n_exc = 0;
 568                 for (i = 0; i < arity; i++) {
 569                         ir_node *ret, *irn;
 570                         ret = get_Block_cfgpred(end_bl, i);
 571                         irn = skip_Proj(ret);
 572                         if (is_fragile_op(irn) || is_Raise(irn)) {
 573                                 cf_pred[n_exc] = ret;
 574                                 ++n_exc;
 575                         }
 576                 }
 577                 if (n_exc > 0) {
 578                         if (n_exc == 1) {
 579                                 /* simple fix */
 580                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
 581                         } else {
 582                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
 583                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
 584                         }
 585                 } else {
 586                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
 587                 }
 588         } else {
 589                 ir_node *main_end_bl;
 590                 int main_end_bl_arity;
 591                 ir_node **end_preds;
 592
 593                 /* assert(exc_handling == 1 || no exceptions. ) */
 594                 n_exc = 0;
 595                 for (i = 0; i < arity; i++) {
 596                         ir_node *ret = get_Block_cfgpred(end_bl, i);
 597                         ir_node *irn = skip_Proj(ret);
 598
 599                         if (is_fragile_op(irn) || is_Raise(irn)) {
 600                                 cf_pred[n_exc] = ret;
 601                                 n_exc++;
 602                         }
 603                 }
 604                 main_end_bl       = get_irg_end_block(irg);
 605                 main_end_bl_arity = get_irn_arity(main_end_bl);
 606                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
 607
 608                 for (i = 0; i < main_end_bl_arity; ++i)
 609                         end_preds[i] = get_irn_n(main_end_bl, i);
 610                 for (i = 0; i < n_exc; ++i)
 611                         end_preds[main_end_bl_arity + i] = cf_pred[i];
 612                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
 613                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
 614                 free(end_preds);
 615         }
 616         free(res_pred);
 617         free(cf_pred);
 618
 619         /* --  Turn CSE back on. -- */
 620         set_optimize(rem_opt);
 621         current_ir_graph = rem;
 622
 623         return 1;
 624 }
 625
 626 /********************************************************************/
 627 /* Apply inlining to small methods.                                 */
 628 /********************************************************************/
 629
 630 static struct obstack  temp_obst;
 631
 632 /** Represents a possible inlinable call in a graph. */
 633 typedef struct call_entry {
 634         ir_node    *call;       /**< The Call node. */
 635         ir_graph   *callee;     /**< The callee IR-graph. */
 636         list_head  list;        /**< List head for linking the next one. */
 637         int        loop_depth;  /**< The loop depth of this call. */
 638         int        benefice;    /**< The calculated benefice of this call. */
 639         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
 640         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
 641 } call_entry;
 642
 643 /**
 644  * environment for inlining small irgs
 645  */
 646 typedef struct inline_env_t {
 647         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
 648         list_head      calls; /**< The call entry list. */
 649 } inline_env_t;
 650
 651 /**
 652  * Returns the irg called from a Call node. If the irg is not
 653  * known, NULL is returned.
 654  *
 655  * @param call  the call node
 656  */
 657 static ir_graph *get_call_called_irg(ir_node *call)
 658 {
 659         ir_node *addr;
 660
 661         addr = get_Call_ptr(call);
 662         if (is_Global(addr)) {
 663                 ir_entity *ent = get_Global_entity(addr);
 664                 /* we don't know which function gets finally bound to a weak symbol */
 665                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
 666                         return NULL;
 667
 668                 return get_entity_irg(ent);
 669         }
 670
 671         return NULL;
 672 }
 673
 674 /**
 675  * Walker: Collect all calls to known graphs inside a graph.
 676  */
 677 static void collect_calls(ir_node *call, void *env)
 678 {
 679         (void) env;
 680         if (is_Call(call)) {
 681                 ir_graph *called_irg = get_call_called_irg(call);
 682
 683                 if (called_irg != NULL) {
 684                         /* The Call node calls a locally defined method.  Remember to inline. */
 685                         inline_env_t *ienv  = (inline_env_t*)env;
 686                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
 687                         entry->call       = call;
 688                         entry->callee     = called_irg;
 689                         entry->loop_depth = 0;
 690                         entry->benefice   = 0;
 691                         entry->local_adr  = 0;
 692                         entry->all_const  = 0;
 693
 694                         list_add_tail(&entry->list, &ienv->calls);
 695                 }
 696         }
 697 }
 698
 699 /**
 700  * Inlines all small methods at call sites where the called address comes
 701  * from a Const node that references the entity representing the called
 702  * method.
 703  * The size argument is a rough measure for the code size of the method:
 704  * Methods where the obstack containing the firm graph is smaller than
 705  * size are inlined.
 706  */
 707 void inline_small_irgs(ir_graph *irg, int size)
 708 {
 709         ir_graph *rem = current_ir_graph;
 710         inline_env_t env;
 711         call_entry *entry;
 712
 713         current_ir_graph = irg;
 714         /* Handle graph state */
 715         assert(get_irg_phase_state(irg) != phase_building);
 716         free_callee_info(irg);
 717
 718         /* Find Call nodes to inline.
 719            (We can not inline during a walk of the graph, as inlining the same
 720            method several times changes the visited flag of the walked graph:
 721            after the first inlining visited of the callee equals visited of
 722            the caller.  With the next inlining both are increased.) */
 723         obstack_init(&env.obst);
 724         INIT_LIST_HEAD(&env.calls);
 725         irg_walk_graph(irg, NULL, collect_calls, &env);
 726
 727         if (! list_empty(&env.calls)) {
 728                 /* There are calls to inline */
 729                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
 730                 collect_phiprojs(irg);
 731
 732                 list_for_each_entry(call_entry, entry, &env.calls, list) {
 733                         ir_graph            *callee = entry->callee;
 734                         irg_inline_property prop    = get_irg_inline_property(callee);
 735
 736                         if (prop == irg_inline_forbidden) {
 737                                 continue;
 738                         }
 739
 740                         if (prop >= irg_inline_forced ||
 741                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
 742                                 inline_method(entry->call, callee);
 743                         }
 744                 }
 745                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
 746         }
 747         obstack_free(&env.obst, NULL);
 748         current_ir_graph = rem;
 749 }
 750
 751 typedef struct inline_small_irgs_pass_t {
 752         ir_graph_pass_t pass;
 753         int            size;
 754 } inline_small_irgs_pass_t;
 755
 756 /**
 757  * Wrapper to run inline_small_irgs() as a pass.
 758  */
 759 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
 760 {
 761         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
 762
 763         inline_small_irgs(irg, pass->size);
 764         return 0;
 765 }
 766
 767 /* create a pass for inline_small_irgs() */
 768 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
 769 {
 770         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
 771
 772         pass->size = size;
 773         return def_graph_pass_constructor(
 774                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
 775 }
 776
 777 /**
 778  * Environment for inlining irgs.
 779  */
 780 typedef struct {
 781         list_head calls;             /**< List of of all call nodes in this graph. */
 782         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
 783         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
 784         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
 785         unsigned  n_nodes_orig;      /**< for statistics */
 786         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
 787         unsigned  n_call_nodes_orig; /**< for statistics */
 788         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
 789         unsigned  n_callers_orig;    /**< for statistics */
 790         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
 791         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
 792 } inline_irg_env;
 793
 794 /**
 795  * Allocate a new environment for inlining.
 796  */
 797 static inline_irg_env *alloc_inline_irg_env(void)
 798 {
 799         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
 800         INIT_LIST_HEAD(&env->calls);
 801         env->local_weights     = NULL;
 802         env->n_nodes           = -2; /* do not count count Start, End */
 803         env->n_blocks          = -2; /* do not count count Start, End Block */
 804         env->n_nodes_orig      = -2; /* do not count Start, End */
 805         env->n_call_nodes      = 0;
 806         env->n_call_nodes_orig = 0;
 807         env->n_callers         = 0;
 808         env->n_callers_orig    = 0;
 809         env->got_inline        = 0;
 810         env->recursive         = 0;
 811         return env;
 812 }
 813
 814 typedef struct walker_env {
 815         inline_irg_env *x;     /**< the inline environment */
 816         char ignore_runtime;   /**< the ignore runtime flag */
 817         char ignore_callers;   /**< if set, do change callers data */
 818 } wenv_t;
 819
 820 /**
 821  * post-walker: collect all calls in the inline-environment
 822  * of a graph and sum some statistics.
 823  */
 824 static void collect_calls2(ir_node *call, void *ctx)
 825 {
 826         wenv_t         *env = (wenv_t*)ctx;
 827         inline_irg_env *x = env->x;
 828         unsigned        code = get_irn_opcode(call);
 829         ir_graph       *callee;
 830         call_entry     *entry;
 831
 832         /* count meaningful nodes in irg */
 833         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
 834                 if (code != iro_Block) {
 835                         ++x->n_nodes;
 836                         ++x->n_nodes_orig;
 837                 } else {
 838                         ++x->n_blocks;
 839                 }
 840         }
 841
 842         if (code != iro_Call) return;
 843
 844         /* check, if it's a runtime call */
 845         if (env->ignore_runtime) {
 846                 ir_node *symc = get_Call_ptr(call);
 847
 848                 if (is_Global(symc)) {
 849                         ir_entity *ent = get_Global_entity(symc);
 850
 851                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
 852                                 return;
 853                 }
 854         }
 855
 856         /* collect all call nodes */
 857         ++x->n_call_nodes;
 858         ++x->n_call_nodes_orig;
 859
 860         callee = get_call_called_irg(call);
 861         if (callee != NULL) {
 862                 if (! env->ignore_callers) {
 863                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
 864                         /* count all static callers */
 865                         ++callee_env->n_callers;
 866                         ++callee_env->n_callers_orig;
 867                 }
 868                 if (callee == current_ir_graph)
 869                         x->recursive = 1;
 870
 871                 /* link it in the list of possible inlinable entries */
 872                 entry = OALLOC(&temp_obst, call_entry);
 873                 entry->call       = call;
 874                 entry->callee     = callee;
 875                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
 876                 entry->benefice   = 0;
 877                 entry->local_adr  = 0;
 878                 entry->all_const  = 0;
 879
 880                 list_add_tail(&entry->list, &x->calls);
 881         }
 882 }
 883
 884 /**
 885  * Returns TRUE if the number of callers is 0 in the irg's environment,
 886  * hence this irg is a leave.
 887  */
 888 inline static int is_leave(ir_graph *irg)
 889 {
 890         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
 891         return env->n_call_nodes == 0;
 892 }
 893
 894 /**
 895  * Returns TRUE if the number of nodes in the callee is
 896  * smaller then size in the irg's environment.
 897  */
 898 inline static int is_smaller(ir_graph *callee, unsigned size)
 899 {
 900         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
 901         return env->n_nodes < size;
 902 }
 903
 904 /**
 905  * Duplicate a call entry.
 906  *
 907  * @param entry     the original entry to duplicate
 908  * @param new_call  the new call node
 909  * @param loop_depth_delta
 910  *                  delta value for the loop depth
 911  */
 912 static call_entry *duplicate_call_entry(const call_entry *entry,
 913                                         ir_node *new_call, int loop_depth_delta)
 914 {
 915         call_entry *nentry = OALLOC(&temp_obst, call_entry);
 916         nentry->call       = new_call;
 917         nentry->callee     = entry->callee;
 918         nentry->benefice   = entry->benefice;
 919         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
 920         nentry->local_adr  = entry->local_adr;
 921         nentry->all_const  = entry->all_const;
 922
 923         return nentry;
 924 }
 925
 926 /**
 927  * Append all call nodes of the source environment to the nodes of in the destination
 928  * environment.
 929  *
 930  * @param dst         destination environment
 931  * @param src         source environment
 932  * @param loop_depth  the loop depth of the call that is replaced by the src list
 933  */
 934 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
 935 {
 936         call_entry *entry, *nentry;
 937
 938         /* Note that the src list points to Call nodes in the inlined graph, but
 939            we need Call nodes in our graph. Luckily the inliner leaves this information
 940            in the link field. */
 941         list_for_each_entry(call_entry, entry, &src->calls, list) {
 942                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
 943                 list_add_tail(&nentry->list, &dst->calls);
 944         }
 945         dst->n_call_nodes += src->n_call_nodes;
 946         dst->n_nodes      += src->n_nodes;
 947 }
 948
 949 /*
 950  * Inlines small leave methods at call sites where the called address comes
 951  * from a Const node that references the entity representing the called
 952  * method.
 953  * The size argument is a rough measure for the code size of the method:
 954  * Methods where the obstack containing the firm graph is smaller than
 955  * size are inlined.
 956  */
 957 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
 958                             unsigned size, int ignore_runtime)
 959 {
 960         inline_irg_env   *env;
 961         ir_graph         *irg;
 962         size_t           i, n_irgs;
 963         ir_graph         *rem;
 964         int              did_inline;
 965         wenv_t           wenv;
 966         call_entry       *entry, *next;
 967         const call_entry *centry;
 968         pmap             *copied_graphs;
 969         pmap_entry       *pm_entry;
 970
 971         rem = current_ir_graph;
 972         obstack_init(&temp_obst);
 973
 974         /* a map for the copied graphs, used to inline recursive calls */
 975         copied_graphs = pmap_create();
 976
 977         /* extend all irgs by a temporary data structure for inlining. */
 978         n_irgs = get_irp_n_irgs();
 979         for (i = 0; i < n_irgs; ++i)
 980                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
 981
 982         /* Pre-compute information in temporary data structure. */
 983         wenv.ignore_runtime = ignore_runtime;
 984         wenv.ignore_callers = 0;
 985         for (i = 0; i < n_irgs; ++i) {
 986                 ir_graph *irg = get_irp_irg(i);
 987
 988                 assert(get_irg_phase_state(irg) != phase_building);
 989                 free_callee_info(irg);
 990
 991                 assure_cf_loop(irg);
 992                 wenv.x = (inline_irg_env*)get_irg_link(irg);
 993                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
 994         }
 995
 996         /* -- and now inline. -- */
 997
 998         /* Inline leaves recursively -- we might construct new leaves. */
 999         do {
1000                 did_inline = 0;
1001
1002                 for (i = 0; i < n_irgs; ++i) {
1003                         ir_node *call;
1004                         int phiproj_computed = 0;
1005
1006                         current_ir_graph = get_irp_irg(i);
1007                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1008
1009                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1010                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1011                                 ir_graph            *callee;
1012                                 irg_inline_property  prop;
1013
1014                                 if (env->n_nodes > maxsize)
1015                                         break;
1016
1017                                 call   = entry->call;
1018                                 callee = entry->callee;
1019
1020                                 prop = get_irg_inline_property(callee);
1021                                 if (prop == irg_inline_forbidden) {
1022                                         continue;
1023                                 }
1024
1025                                 if (is_leave(callee) && (
1026                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1027                                         if (!phiproj_computed) {
1028                                                 phiproj_computed = 1;
1029                                                 collect_phiprojs(current_ir_graph);
1030                                         }
1031                                         did_inline = inline_method(call, callee);
1032
1033                                         if (did_inline) {
1034                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1035
1036                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1037                                                 phiproj_computed = 0;
1038
1039                                                 /* Do some statistics */
1040                                                 env->got_inline = 1;
1041                                                 --env->n_call_nodes;
1042                                                 env->n_nodes += callee_env->n_nodes;
1043                                                 --callee_env->n_callers;
1044
1045                                                 /* remove this call from the list */
1046                                                 list_del(&entry->list);
1047                                                 continue;
1048                                         }
1049                                 }
1050                         }
1051                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1052                 }
1053         } while (did_inline);
1054
1055         /* inline other small functions. */
1056         for (i = 0; i < n_irgs; ++i) {
1057                 ir_node *call;
1058                 int phiproj_computed = 0;
1059
1060                 current_ir_graph = get_irp_irg(i);
1061                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1062
1063                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1064
1065                 /* note that the list of possible calls is updated during the process */
1066                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1067                         irg_inline_property prop;
1068                         ir_graph            *callee;
1069                         pmap_entry          *e;
1070
1071                         call   = entry->call;
1072                         callee = entry->callee;
1073
1074                         prop = get_irg_inline_property(callee);
1075                         if (prop == irg_inline_forbidden) {
1076                                 continue;
1077                         }
1078
1079                         e = pmap_find(copied_graphs, callee);
1080                         if (e != NULL) {
1081                                 /*
1082                                  * Remap callee if we have a copy.
1083                                  * FIXME: Should we do this only for recursive Calls ?
1084                                  */
1085                                 callee = (ir_graph*)e->value;
1086                         }
1087
1088                         if (prop >= irg_inline_forced ||
1089                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1090                                 if (current_ir_graph == callee) {
1091                                         /*
1092                                          * Recursive call: we cannot directly inline because we cannot walk
1093                                          * the graph and change it. So we have to make a copy of the graph
1094                                          * first.
1095                                          */
1096
1097                                         inline_irg_env *callee_env;
1098                                         ir_graph       *copy;
1099
1100                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1101
1102                                         /*
1103                                          * No copy yet, create one.
1104                                          * Note that recursive methods are never leaves, so it is sufficient
1105                                          * to test this condition here.
1106                                          */
1107                                         copy = create_irg_copy(callee);
1108
1109                                         /* create_irg_copy() destroys the Proj links, recompute them */
1110                                         phiproj_computed = 0;
1111
1112                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1113
1114                                         /* allocate new environment */
1115                                         callee_env = alloc_inline_irg_env();
1116                                         set_irg_link(copy, callee_env);
1117
1118                                         assure_cf_loop(copy);
1119                                         wenv.x              = callee_env;
1120                                         wenv.ignore_callers = 1;
1121                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1122
1123                                         /*
1124                                          * Enter the entity of the original graph. This is needed
1125                                          * for inline_method(). However, note that ent->irg still points
1126                                          * to callee, NOT to copy.
1127                                          */
1128                                         set_irg_entity(copy, get_irg_entity(callee));
1129
1130                                         pmap_insert(copied_graphs, callee, copy);
1131                                         callee = copy;
1132
1133                                         /* we have only one caller: the original graph */
1134                                         callee_env->n_callers      = 1;
1135                                         callee_env->n_callers_orig = 1;
1136                                 }
1137                                 if (! phiproj_computed) {
1138                                         phiproj_computed = 1;
1139                                         collect_phiprojs(current_ir_graph);
1140                                 }
1141                                 did_inline = inline_method(call, callee);
1142                                 if (did_inline) {
1143                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1144
1145                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1146                                         phiproj_computed = 0;
1147
1148                                         /* callee was inline. Append its call list. */
1149                                         env->got_inline = 1;
1150                                         --env->n_call_nodes;
1151                                         append_call_list(env, callee_env, entry->loop_depth);
1152                                         --callee_env->n_callers;
1153
1154                                         /* after we have inlined callee, all called methods inside callee
1155                                            are now called once more */
1156                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1157                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1158                                                 ++penv->n_callers;
1159                                         }
1160
1161                                         /* remove this call from the list */
1162                                         list_del(&entry->list);
1163                                         continue;
1164                                 }
1165                         }
1166                 }
1167                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1168         }
1169
1170         for (i = 0; i < n_irgs; ++i) {
1171                 irg = get_irp_irg(i);
1172                 env = (inline_irg_env*)get_irg_link(irg);
1173
1174                 if (env->got_inline) {
1175                         optimize_graph_df(irg);
1176                         optimize_cf(irg);
1177                 }
1178                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1179                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1180                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1181                         env->n_callers_orig, env->n_callers,
1182                         get_entity_name(get_irg_entity(irg))));
1183                 }
1184         }
1185
1186         /* kill the copied graphs: we don't need them anymore */
1187         foreach_pmap(copied_graphs, pm_entry) {
1188                 ir_graph *copy = (ir_graph*)pm_entry->value;
1189
1190                 /* reset the entity, otherwise it will be deleted in the next step ... */
1191                 set_irg_entity(copy, NULL);
1192                 free_ir_graph(copy);
1193         }
1194         pmap_destroy(copied_graphs);
1195
1196         obstack_free(&temp_obst, NULL);
1197         current_ir_graph = rem;
1198 }
1199
1200 typedef struct inline_leave_functions_pass_t {
1201         ir_prog_pass_t pass;
1202         unsigned       maxsize;
1203         unsigned       leavesize;
1204         unsigned       size;
1205         int            ignore_runtime;
1206 } inline_leave_functions_pass_t;
1207
1208 /**
1209  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1210  */
1211 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1212 {
1213         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1214
1215         (void)irp;
1216         inline_leave_functions(
1217                 pass->maxsize, pass->leavesize,
1218                 pass->size, pass->ignore_runtime);
1219         return 0;
1220 }
1221
1222 /* create a pass for inline_leave_functions() */
1223 ir_prog_pass_t *inline_leave_functions_pass(
1224         const char *name, unsigned maxsize, unsigned leavesize,
1225         unsigned size, int ignore_runtime)
1226 {
1227         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1228
1229         pass->maxsize        = maxsize;
1230         pass->leavesize      = leavesize;
1231         pass->size           = size;
1232         pass->ignore_runtime = ignore_runtime;
1233
1234         return def_prog_pass_constructor(
1235                 &pass->pass,
1236                 name ? name : "inline_leave_functions",
1237                 inline_leave_functions_wrapper);
1238 }
1239
1240 /**
1241  * Calculate the parameter weights for transmitting the address of a local variable.
1242  */
1243 static unsigned calc_method_local_weight(ir_node *arg)
1244 {
1245         int      i, j, k;
1246         unsigned v, weight = 0;
1247
1248         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1249                 ir_node *succ = get_irn_out(arg, i);
1250
1251                 switch (get_irn_opcode(succ)) {
1252                 case iro_Load:
1253                 case iro_Store:
1254                         /* Loads and Store can be removed */
1255                         weight += 3;
1256                         break;
1257                 case iro_Sel:
1258                         /* check if all args are constant */
1259                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1260                                 ir_node *idx = get_Sel_index(succ, j);
1261                                 if (! is_Const(idx))
1262                                         return 0;
1263                         }
1264                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1265                            some unsupported node. */
1266                         v = calc_method_local_weight(succ);
1267                         if (v == 0)
1268                                 return 0;
1269                         /* we can kill one Sel with constant indexes, this is cheap */
1270                         weight += v + 1;
1271                         break;
1272                 case iro_Id:
1273                         /* when looking backward we might find Id nodes */
1274                         weight += calc_method_local_weight(succ);
1275                         break;
1276                 case iro_Tuple:
1277                         /* unoptimized tuple */
1278                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1279                                 ir_node *pred = get_Tuple_pred(succ, j);
1280                                 if (pred == arg) {
1281                                         /* look for Proj(j) */
1282                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1283                                                 ir_node *succ_succ = get_irn_out(succ, k);
1284                                                 if (is_Proj(succ_succ)) {
1285                                                         if (get_Proj_proj(succ_succ) == j) {
1286                                                                 /* found */
1287                                                                 weight += calc_method_local_weight(succ_succ);
1288                                                         }
1289                                                 } else {
1290                                                         /* this should NOT happen */
1291                                                         return 0;
1292                                                 }
1293                                         }
1294                                 }
1295                         }
1296                         break;
1297                 default:
1298                         /* any other node: unsupported yet or bad. */
1299                         return 0;
1300                 }
1301         }
1302         return weight;
1303 }
1304
1305 /**
1306  * Calculate the parameter weights for transmitting the address of a local variable.
1307  */
1308 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1309 {
1310         ir_entity *ent = get_irg_entity(irg);
1311         ir_type  *mtp;
1312         size_t   nparams;
1313         int      i;
1314         long     proj_nr;
1315         ir_node  *irg_args, *arg;
1316
1317         mtp      = get_entity_type(ent);
1318         nparams  = get_method_n_params(mtp);
1319
1320         /* allocate a new array. currently used as 'analysed' flag */
1321         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1322
1323         /* If the method haven't parameters we have nothing to do. */
1324         if (nparams <= 0)
1325                 return;
1326
1327         assure_irg_outs(irg);
1328         irg_args = get_irg_args(irg);
1329         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1330                 arg     = get_irn_out(irg_args, i);
1331                 proj_nr = get_Proj_proj(arg);
1332                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1333         }
1334 }
1335
1336 /**
1337  * Calculate the benefice for transmitting an local variable address.
1338  * After inlining, the local variable might be transformed into a
1339  * SSA variable by scalar_replacement().
1340  */
1341 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1342 {
1343         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1344
1345         if (env->local_weights == NULL)
1346                 analyze_irg_local_weights(env, callee);
1347
1348         if (pos < ARR_LEN(env->local_weights))
1349                 return env->local_weights[pos];
1350         return 0;
1351 }
1352
1353 /**
1354  * Calculate a benefice value for inlining the given call.
1355  *
1356  * @param call       the call node we have to inspect
1357  * @param callee     the called graph
1358  */
1359 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1360 {
1361         ir_node   *call = entry->call;
1362         ir_entity *ent  = get_irg_entity(callee);
1363         ir_node   *frame_ptr;
1364         ir_type   *mtp;
1365         int       weight = 0;
1366         int       i, n_params, all_const;
1367         unsigned  cc, v;
1368         irg_inline_property prop;
1369
1370         inline_irg_env *callee_env;
1371
1372         prop = get_irg_inline_property(callee);
1373         if (prop == irg_inline_forbidden) {
1374                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1375                     call, callee));
1376                 return entry->benefice = INT_MIN;
1377         }
1378
1379         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1380                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1381                     call, callee));
1382                 return entry->benefice = INT_MIN;
1383         }
1384
1385         /* costs for every passed parameter */
1386         n_params = get_Call_n_params(call);
1387         mtp      = get_entity_type(ent);
1388         cc       = get_method_calling_convention(mtp);
1389         if (cc & cc_reg_param) {
1390                 /* register parameter, smaller costs for register parameters */
1391                 int max_regs = cc & ~cc_bits;
1392
1393                 if (max_regs < n_params)
1394                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1395                 else
1396                         weight += n_params * 2;
1397         } else {
1398                 /* parameters are passed an stack */
1399                 weight += 5 * n_params;
1400         }
1401
1402         /* constant parameters improve the benefice */
1403         frame_ptr = get_irg_frame(current_ir_graph);
1404         all_const = 1;
1405         for (i = 0; i < n_params; ++i) {
1406                 ir_node *param = get_Call_param(call, i);
1407
1408                 if (is_Const(param)) {
1409                         weight += get_method_param_weight(ent, i);
1410                 } else {
1411                         all_const = 0;
1412                         if (is_SymConst(param))
1413                                 weight += get_method_param_weight(ent, i);
1414                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1415                                 /*
1416                                  * An address of a local variable is transmitted. After
1417                                  * inlining, scalar_replacement might be able to remove the
1418                                  * local variable, so honor this.
1419                                  */
1420                                 v = get_method_local_adress_weight(callee, i);
1421                                 weight += v;
1422                                 if (v > 0)
1423                                         entry->local_adr = 1;
1424                         }
1425                 }
1426         }
1427         entry->all_const = all_const;
1428
1429         callee_env = (inline_irg_env*)get_irg_link(callee);
1430         if (callee_env->n_callers == 1 &&
1431             callee != current_ir_graph &&
1432             !entity_is_externally_visible(ent)) {
1433                 weight += 700;
1434         }
1435
1436         /* give a bonus for functions with one block */
1437         if (callee_env->n_blocks == 1)
1438                 weight = weight * 3 / 2;
1439
1440         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1441         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1442                 weight += 2000;
1443
1444         /* and finally for leaves: they do not increase the register pressure
1445            because of callee safe registers */
1446         if (callee_env->n_call_nodes == 0)
1447                 weight += 400;
1448
1449         /** it's important to inline inner loops first */
1450         if (entry->loop_depth > 30)
1451                 weight += 30 * 1024;
1452         else
1453                 weight += entry->loop_depth * 1024;
1454
1455         /*
1456          * All arguments constant is probably a good sign, give an extra bonus
1457          */
1458         if (all_const)
1459                 weight += 1024;
1460
1461         return entry->benefice = weight;
1462 }
1463
1464 typedef struct walk_env_t {
1465         ir_graph **irgs;
1466         size_t   last_irg;
1467 } walk_env_t;
1468
1469 /**
1470  * Callgraph walker, collect all visited graphs.
1471  */
1472 static void callgraph_walker(ir_graph *irg, void *data)
1473 {
1474         walk_env_t *env = (walk_env_t *)data;
1475         env->irgs[env->last_irg++] = irg;
1476 }
1477
1478 /**
1479  * Creates an inline order for all graphs.
1480  *
1481  * @return the list of graphs.
1482  */
1483 static ir_graph **create_irg_list(void)
1484 {
1485         ir_entity  **free_methods;
1486         size_t     n_irgs = get_irp_n_irgs();
1487         walk_env_t env;
1488
1489         cgana(&free_methods);
1490         xfree(free_methods);
1491
1492         compute_callgraph();
1493
1494         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1495         env.last_irg = 0;
1496
1497         callgraph_walk(NULL, callgraph_walker, &env);
1498         assert(n_irgs == env.last_irg);
1499
1500         return env.irgs;
1501 }
1502
1503 /**
1504  * Push a call onto the priority list if its benefice is big enough.
1505  *
1506  * @param pqueue   the priority queue of calls
1507  * @param call     the call entry
1508  * @param inlien_threshold
1509  *                 the threshold value
1510  */
1511 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1512                             int inline_threshold)
1513 {
1514         ir_graph            *callee  = call->callee;
1515         irg_inline_property prop     = get_irg_inline_property(callee);
1516         int                 benefice = calc_inline_benefice(call, callee);
1517
1518         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1519             get_irn_irg(call->call), call->call, callee, benefice));
1520
1521         if (prop < irg_inline_forced && benefice < inline_threshold) {
1522                 return;
1523         }
1524
1525         pqueue_put(pqueue, call, benefice);
1526 }
1527
1528 /**
1529  * Try to inline calls into a graph.
1530  *
1531  * @param irg      the graph into which we inline
1532  * @param maxsize  do NOT inline if the size of irg gets
1533  *                 bigger than this amount
1534  * @param inline_threshold
1535  *                 threshold value for inline decision
1536  * @param copied_graphs
1537  *                 map containing copied of recursive graphs
1538  */
1539 static void inline_into(ir_graph *irg, unsigned maxsize,
1540                         int inline_threshold, pmap *copied_graphs)
1541 {
1542         int            phiproj_computed = 0;
1543         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1544         call_entry     *curr_call;
1545         wenv_t         wenv;
1546         pqueue_t       *pqueue;
1547
1548         if (env->n_call_nodes == 0)
1549                 return;
1550
1551         if (env->n_nodes > maxsize) {
1552                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1553                 return;
1554         }
1555
1556         current_ir_graph = irg;
1557         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1558
1559         /* put irgs into the pqueue */
1560         pqueue = new_pqueue();
1561
1562         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1563                 assert(is_Call(curr_call->call));
1564                 maybe_push_call(pqueue, curr_call, inline_threshold);
1565         }
1566
1567         /* note that the list of possible calls is updated during the process */
1568         while (!pqueue_empty(pqueue)) {
1569                 int                 did_inline;
1570                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1571                 ir_graph            *callee     = curr_call->callee;
1572                 ir_node             *call_node  = curr_call->call;
1573                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1574                 irg_inline_property prop        = get_irg_inline_property(callee);
1575                 int                 loop_depth;
1576                 const call_entry    *centry;
1577                 pmap_entry          *e;
1578
1579                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1580                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1581                                                 env->n_nodes, callee, callee_env->n_nodes));
1582                         continue;
1583                 }
1584
1585                 e = pmap_find(copied_graphs, callee);
1586                 if (e != NULL) {
1587                         int benefice = curr_call->benefice;
1588                         /*
1589                          * Reduce the weight for recursive function IFF not all arguments are const.
1590                          * inlining recursive functions is rarely good.
1591                          */
1592                         if (!curr_call->all_const)
1593                                 benefice -= 2000;
1594                         if (benefice < inline_threshold)
1595                                 continue;
1596
1597                         /*
1598                          * Remap callee if we have a copy.
1599                          */
1600                         callee     = (ir_graph*)e->value;
1601                         callee_env = (inline_irg_env*)get_irg_link(callee);
1602                 }
1603
1604                 if (current_ir_graph == callee) {
1605                         /*
1606                          * Recursive call: we cannot directly inline because we cannot
1607                          * walk the graph and change it. So we have to make a copy of
1608                          * the graph first.
1609                          */
1610                         int benefice = curr_call->benefice;
1611                         ir_graph *copy;
1612
1613                         /*
1614                          * Reduce the weight for recursive function IFF not all arguments are const.
1615                          * inlining recursive functions is rarely good.
1616                          */
1617                         if (!curr_call->all_const)
1618                                 benefice -= 2000;
1619                         if (benefice < inline_threshold)
1620                                 continue;
1621
1622                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1623
1624                         /*
1625                          * No copy yet, create one.
1626                          * Note that recursive methods are never leaves, so it is
1627                          * sufficient to test this condition here.
1628                          */
1629                         copy = create_irg_copy(callee);
1630
1631                         /* create_irg_copy() destroys the Proj links, recompute them */
1632                         phiproj_computed = 0;
1633
1634                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1635
1636                         /* allocate a new environment */
1637                         callee_env = alloc_inline_irg_env();
1638                         set_irg_link(copy, callee_env);
1639
1640                         assure_cf_loop(copy);
1641                         memset(&wenv, 0, sizeof(wenv));
1642                         wenv.x              = callee_env;
1643                         wenv.ignore_callers = 1;
1644                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1645
1646                         /*
1647                          * Enter the entity of the original graph. This is needed
1648                          * for inline_method(). However, note that ent->irg still points
1649                          * to callee, NOT to copy.
1650                          */
1651                         set_irg_entity(copy, get_irg_entity(callee));
1652
1653                         pmap_insert(copied_graphs, callee, copy);
1654                         callee = copy;
1655
1656                         /* we have only one caller: the original graph */
1657                         callee_env->n_callers      = 1;
1658                         callee_env->n_callers_orig = 1;
1659                 }
1660                 if (! phiproj_computed) {
1661                         phiproj_computed = 1;
1662                         collect_phiprojs(current_ir_graph);
1663                 }
1664                 did_inline = inline_method(call_node, callee);
1665                 if (!did_inline)
1666                         continue;
1667
1668                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1669                 phiproj_computed = 0;
1670
1671                 /* remove it from the caller list */
1672                 list_del(&curr_call->list);
1673
1674                 /* callee was inline. Append its call list. */
1675                 env->got_inline = 1;
1676                 --env->n_call_nodes;
1677
1678                 /* we just generate a bunch of new calls */
1679                 loop_depth = curr_call->loop_depth;
1680                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1681                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1682                         ir_node        *new_call;
1683                         call_entry     *new_entry;
1684
1685                         /* after we have inlined callee, all called methods inside
1686                          * callee are now called once more */
1687                         ++penv->n_callers;
1688
1689                         /* Note that the src list points to Call nodes in the inlined graph,
1690                          * but we need Call nodes in our graph. Luckily the inliner leaves
1691                          * this information in the link field. */
1692                         new_call = (ir_node*)get_irn_link(centry->call);
1693                         if (get_irn_irg(new_call) != irg) {
1694                                 /* centry->call has not been copied, which means it is dead.
1695                                  * This might happen during inlining, if a const function,
1696                                  * which cannot be inlined is only used as an unused argument
1697                                  * of another function, which is inlined. */
1698                                 continue;
1699                         }
1700                         assert(is_Call(new_call));
1701
1702                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1703                         list_add_tail(&new_entry->list, &env->calls);
1704                         maybe_push_call(pqueue, new_entry, inline_threshold);
1705                 }
1706
1707                 env->n_call_nodes += callee_env->n_call_nodes;
1708                 env->n_nodes += callee_env->n_nodes;
1709                 --callee_env->n_callers;
1710         }
1711         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1712         del_pqueue(pqueue);
1713 }
1714
1715 /*
1716  * Heuristic inliner. Calculates a benefice value for every call and inlines
1717  * those calls with a value higher than the threshold.
1718  */
1719 void inline_functions(unsigned maxsize, int inline_threshold,
1720                       opt_ptr after_inline_opt)
1721 {
1722         inline_irg_env   *env;
1723         size_t           i, n_irgs;
1724         ir_graph         *rem;
1725         wenv_t           wenv;
1726         pmap             *copied_graphs;
1727         pmap_entry       *pm_entry;
1728         ir_graph         **irgs;
1729
1730         rem = current_ir_graph;
1731         obstack_init(&temp_obst);
1732
1733         irgs = create_irg_list();
1734
1735         /* a map for the copied graphs, used to inline recursive calls */
1736         copied_graphs = pmap_create();
1737
1738         /* extend all irgs by a temporary data structure for inlining. */
1739         n_irgs = get_irp_n_irgs();
1740         for (i = 0; i < n_irgs; ++i)
1741                 set_irg_link(irgs[i], alloc_inline_irg_env());
1742
1743         /* Pre-compute information in temporary data structure. */
1744         wenv.ignore_runtime = 0;
1745         wenv.ignore_callers = 0;
1746         for (i = 0; i < n_irgs; ++i) {
1747                 ir_graph *irg = irgs[i];
1748
1749                 free_callee_info(irg);
1750
1751                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1752                 assure_cf_loop(irg);
1753                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1754         }
1755
1756         /* -- and now inline. -- */
1757         for (i = 0; i < n_irgs; ++i) {
1758                 ir_graph *irg = irgs[i];
1759
1760                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1761         }
1762
1763         for (i = 0; i < n_irgs; ++i) {
1764                 ir_graph *irg = irgs[i];
1765
1766                 env = (inline_irg_env*)get_irg_link(irg);
1767                 if (env->got_inline && after_inline_opt != NULL) {
1768                         /* this irg got calls inlined: optimize it */
1769                         after_inline_opt(irg);
1770                 }
1771                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1772                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1773                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1774                         env->n_callers_orig, env->n_callers,
1775                         get_entity_name(get_irg_entity(irg))));
1776                 }
1777         }
1778
1779         /* kill the copied graphs: we don't need them anymore */
1780         foreach_pmap(copied_graphs, pm_entry) {
1781                 ir_graph *copy = (ir_graph*)pm_entry->value;
1782
1783                 /* reset the entity, otherwise it will be deleted in the next step ... */
1784                 set_irg_entity(copy, NULL);
1785                 free_ir_graph(copy);
1786         }
1787         pmap_destroy(copied_graphs);
1788
1789         xfree(irgs);
1790
1791         obstack_free(&temp_obst, NULL);
1792         current_ir_graph = rem;
1793 }
1794
1795 typedef struct inline_functions_pass_t {
1796         ir_prog_pass_t pass;
1797         unsigned       maxsize;
1798         int            inline_threshold;
1799         opt_ptr        after_inline_opt;
1800 } inline_functions_pass_t;
1801
1802 /**
1803  * Wrapper to run inline_functions() as a ir_prog pass.
1804  */
1805 static int inline_functions_wrapper(ir_prog *irp, void *context)
1806 {
1807         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1808
1809         (void)irp;
1810         inline_functions(pass->maxsize, pass->inline_threshold,
1811                          pass->after_inline_opt);
1812         return 0;
1813 }
1814
1815 /* create a ir_prog pass for inline_functions */
1816 ir_prog_pass_t *inline_functions_pass(
1817           const char *name, unsigned maxsize, int inline_threshold,
1818           opt_ptr after_inline_opt)
1819 {
1820         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1821
1822         pass->maxsize          = maxsize;
1823         pass->inline_threshold = inline_threshold;
1824         pass->after_inline_opt = after_inline_opt;
1825
1826         return def_prog_pass_constructor(
1827                 &pass->pass, name ? name : "inline_functions",
1828                 inline_functions_wrapper);
1829 }
1830
1831 void firm_init_inline(void)
1832 {
1833         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1834 }