nsz Git - libfirm/blob - include/libfirm/iroptimize.h

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief   Available Optimisations of libFirm.
  23  * @version $Id$
  24  */
  25 #ifndef FIRM_IROPTIMIZE_H
  26 #define FIRM_IROPTIMIZE_H
  27
  28 #include "firm_types.h"
  29
  30 /**
  31  * Control flow optimization.
  32  *
  33  * Removes empty blocks doing if simplifications and loop simplifications.
  34  * A block is empty if it contains only a Jmp node and Phi nodes.
  35  * Merges single entry single exit blocks with their predecessor
  36  * and propagates dead control flow by calling equivalent_node().
  37  * Independent of compiler flag it removes Tuples from cf edges,
  38  * Bad predecessors from Blocks and Phis, and unnecessary predecessors of End.
  39  *
  40  * @bug So far destroys backedge information.
  41  * @bug Chokes on Id nodes if called in a certain order with other
  42  *      optimizations.  Call local_optimize_graph() before to remove
  43  *      Ids.
  44  */
  45 void optimize_cf(ir_graph *irg);
  46
  47 /**
  48  * Perform path-sensitive jump threading on the given graph.
  49  *
  50  * @param irg  the graph
  51  */
  52 void opt_jumpthreading(ir_graph* irg);
  53
  54 /**
  55  * Try to simplify boolean expression in the given ir graph.
  56  * eg. x < 5 && x < 6 becomes x < 5
  57  *
  58  * @param irg  the graph
  59  */
  60 void opt_bool(ir_graph *irg);
  61
  62 /**
  63  * Try to reduce the number of conv nodes in the given ir graph.
  64  *
  65  * @param irg  the graph
  66  *
  67  * @return non-zero if the optimization could be applied, 0 else
  68  */
  69 int conv_opt(ir_graph *irg);
  70
  71 /**
  72  * Do the scalar replacement optimization.
  73  * Make a date flow analyze and split the
  74  * data flow edges.
  75  *
  76  * @param irg  the graph which should be optimized
  77  */
  78 void data_flow_scalar_replacement_opt(ir_graph *irg);
  79
  80 /**
  81  * A callback that checks whether a entity is an allocation
  82  * routine.
  83  */
  84 typedef int (*check_alloc_entity_func)(ir_entity *ent);
  85
  86 /**
  87  * Do simple and fast escape analysis for one graph.
  88  *
  89  * @param irg       the graph
  90  * @param callback  a callback function to check whether a
  91  *                  given entity is a allocation call
  92  */
  93 void escape_enalysis_irg(ir_graph *irg, check_alloc_entity_func callback);
  94
  95 /**
  96  * Do simple and fast escape analysis for all graphs.
  97  *
  98  * This optimization implements a simple and fast but inexact
  99  * escape analysis. Some addresses might be marked as 'escaped' even
 100  * if they are not.
 101  * The advantage is a low memory footprint and fast speed.
 102  *
 103  * @param run_scalar_replace  if this flag in non-zero, scalar
 104  *                            replacement optimization is run on graphs with removed
 105  *                            allocation
 106  * @param callback            a callback function to check whether a
 107  *                            given entity is a allocation call
 108  *
 109  * This optimization removes allocation which are not used (rare) and replace
 110  * allocation that can be proved dead at the end of the graph which stack variables.
 111  *
 112  * The creation of stack variable allows scalar replacement to be run only
 113  * on those graphs that have been changed.
 114  *
 115  * This is most effective on Java where no other stack variables exists.
 116  */
 117 void escape_analysis(int run_scalar_replace, check_alloc_entity_func callback);
 118
 119 /**
 120  * Optimize function calls by handling const functions.
 121  *
 122  * This optimization first detects all "const functions", i.e.,
 123  * IR graphs that neither read nor write memory (and hence did
 124  * not create exceptions, as these use memory in Firm).
 125  *
 126  * The result of calls to such functions depends only on its
 127  * arguments, hence those calls are no more pinned.
 128  *
 129  * This is a rather strong criteria, so do not expect that a
 130  * lot of functions will be found. Moreover, all of them might
 131  * already be inlined if inlining is activated.
 132  * Anyway, it might be good for handling builtin's or pseudo-graphs,
 133  * even if the later read/write memory (but we know how).
 134  *
 135  * This optimizations read the irg_const_function property of
 136  * entities and and sets the irg_const_function property of
 137  * graphs.
 138  *
 139  * If callee information is valid, we also optimize polymorphic Calls.
 140  *
 141  * @param force_run  if non-zero, an optimization run is started even
 142  *                   if no const function graph was detected.
 143  *                   Else calls are only optimized if at least one
 144  *                   const function graph was detected.
 145  * @param callback   a callback function to check whether a
 146  *                   given entity is a allocation call
 147  *
 148  * If the frontend created external entities with the irg_const_function
 149  * property set, the force_run parameter should be set, else
 150  * should be unset.
 151  *
 152  * @note This optimization destroys the link fields of nodes.
 153  */
 154 void optimize_funccalls(int force_run, check_alloc_entity_func callback);
 155
 156 /**
 157  * Does Partial Redundancy Elimination combined with
 158  * Global Value Numbering.
 159  * Can be used to replace place_code() completely.
 160  *
 161  * Based on VanDrunen and Hosking 2004.
 162  *
 163  * @param irg  the graph
 164  */
 165 void do_gvn_pre(ir_graph *irg);
 166
 167 /**
 168  * This function is called to evaluate, if a mux can build
 169  * of the current architecture.
 170  * If it returns non-zero, a mux is created, else the code
 171  * is not modified.
 172  * @param sel        A selector of a Cond.
 173  * @param phi_list   List of Phi nodes about to be converted (linked via get_Phi_next() field)
 174  * @param i          First data predecessor involved in if conversion
 175  * @param j          Second data predecessor involved in if conversion
 176  */
 177 typedef int (*arch_allow_ifconv_func)(ir_node *sel, ir_node* phi_list, int i, int j);
 178
 179 /**
 180  * The parameters structure.
 181  */
 182 struct ir_settings_if_conv_t {
 183         int                 max_depth;       /**< The maximum depth up to which expressions
 184                                                are examined when it has to be decided if they
 185                                                can be placed into another block. */
 186         arch_allow_ifconv_func allow_ifconv; /**< Evaluator function, if not set all possible Psi
 187                                                nodes will be created. */
 188 };
 189
 190 /**
 191  * Perform If conversion on a graph.
 192  *
 193  * @param irg The graph.
 194  * @param params The parameters for the if conversion.
 195  *
 196  * Cannot handle blocks with Bad control predecessors, so call it after control
 197  * flow optimization.
 198  */
 199 void opt_if_conv(ir_graph *irg, const ir_settings_if_conv_t *params);
 200
 201 void opt_sync(ir_graph *irg);
 202
 203 /*
 204  * Check if we can replace the load by a given const from
 205  * the const code irg.
 206  *
 207  * @param load   the load to replace
 208  * @param c      the constant
 209  *
 210  * @return in the modes match or can be transformed using a reinterpret cast
 211  *         returns a copy of the constant (possibly Conv'ed) on the
 212  *         current_ir_graph
 213  */
 214 ir_node *can_replace_load_by_const(const ir_node *load, ir_node *c);
 215
 216 /**
 217  * Load/Store optimization.
 218  *
 219  * Removes redundant non-volatile Loads and Stores.
 220  * May introduce Bad nodes if exceptional control flow
 221  * is removed. The following cases are optimized:
 222  *
 223  * Load without result: A Load which has only a memory use
 224  *   is removed.
 225  *
 226  * Load after Store: A Load after a Store is removed, if
 227  *   the Load doesn't have an exception handler OR is in
 228  *   the same block as the Store.
 229  *
 230  * Load after Load: A Load after a Load is removed, if the
 231  *   Load doesn't have an exception handler OR is in the
 232  *   same block as the previous Load.
 233  *
 234  * Store before Store: A Store immediately before another
 235  *   Store in the same block is removed, if the Store doesn't
 236  *   have an exception handler.
 237  *
 238  * Store after Load: A Store after a Load is removed, if the
 239  *   Store doesn't have an exception handler.
 240  *
 241  * @return non-zero if the optimization could be applied, 0 else
 242  */
 243 int optimize_load_store(ir_graph *irg);
 244
 245 /**
 246  * New experimental alternative to optimize_load_store.
 247  * Based on a dataflow analysis, so load/stores are moved out of loops
 248  * where possible
 249  */
 250 int opt_ldst(ir_graph *irg);
 251
 252 /**
 253  * Do Loop unrolling in the given graph.
 254  */
 255 void optimize_loop_unrolling(ir_graph *irg);
 256
 257 /**
 258  * Optimize the frame type of an irg by removing
 259  * never touched entities.
 260  *
 261  * @param irg  The graph whose frame type will be optimized
 262  *
 263  * This function did not change the graph, only it's frame type.
 264  * The layout state of the frame type will be set to layout_undefined
 265  * if entities were removed.
 266  */
 267 void opt_frame_irg(ir_graph *irg);
 268
 269 /** Possible flags for the Operator Scalar Replacement. */
 270 typedef enum osr_flags {
 271         osr_flag_none               = 0,  /**< no additional flags */
 272         osr_flag_lftr_with_ov_check = 1,  /**< do linear function test replacement
 273                                                only if no overflow can occur. */
 274         osr_flag_ignore_x86_shift   = 2,  /**< ignore Multiplications by 2, 4, 8 */
 275         osr_flag_keep_reg_pressure  = 4   /**< do NOT increase register pressure by introducing new
 276                                                induction variables. */
 277 } osr_flags;
 278
 279 /* FirmJNI cannot handle identical enum values... */
 280
 281 /** default setting */
 282 #define osr_flag_default osr_flag_lftr_with_ov_check
 283
 284 /**
 285  * Do the Operator Scalar Replacement optimization and linear
 286  * function test replacement for loop control.
 287  * Can be switched off using the set_opt_strength_red() flag.
 288  * In that case, only remove_phi_cycles() is executed.
 289  *
 290  * @param irg    the graph which should be optimized
 291  * @param flags  set of osr_flags
 292  *
 293  * The linear function replacement test is controlled by the flags.
 294  * If the osr_flag_lftr_with_ov_check is set, the replacement is only
 295  * done if do overflow can occur.
 296  * Otherwise it is ALWAYS done which might be insecure.
 297  *
 298  * For instance:
 299  *
 300  * for (i = 0; i < 100; ++i)
 301  *
 302  * might be replaced by
 303  *
 304  * for (i = 0; i < 400; i += 4)
 305  *
 306  * But
 307  *
 308  * for (i = 0; i < 0x7FFFFFFF; ++i)
 309  *
 310  * will not be replaced by
 311  *
 312  * for (i = 0; i < 0xFFFFFFFC; i += 4)
 313  *
 314  * because of overflow.
 315  *
 316  * More bad cases:
 317  *
 318  * for (i = 0; i <= 0xF; ++i)
 319  *
 320  * will NOT be transformed into
 321  *
 322  * for (i = 0xFFFFFFF0; i <= 0xFFFFFFFF; ++i)
 323  *
 324  * although here is no direct overflow. The OV occurs when the ++i
 325  * is executed (and would created an endless loop here!).
 326  *
 327  * For the same reason, a loop
 328  *
 329  * for (i = 0; i <= 9; i += x)
 330  *
 331  * will NOT be transformed because we cannot estimate whether an overflow
 332  * might happen adding x.
 333  *
 334  * Note that i < a + 400 is also not possible with the current implementation
 335  * although this might be allowed by other compilers...
 336  *
 337  * Note further that tests for equality can be handled some simpler (but are not
 338  * implemented yet).
 339  *
 340  * This algorithm destroys the link field of nodes.
 341  */
 342 void opt_osr(ir_graph *irg, unsigned flags);
 343
 344 /**
 345  * Removes useless Phi cycles, i.e cycles of Phi nodes with only one
 346  * non-Phi node.
 347  * This is automatically done in opt_osr(), so there is no need to call it
 348  * additionally.
 349  *
 350  * @param irg    the graph which should be optimized
 351  *
 352  * This algorithm destroys the link field of nodes.
 353  */
 354 void remove_phi_cycles(ir_graph *irg);
 355
 356 /** A default threshold. */
 357 #define DEFAULT_CLONE_THRESHOLD 300
 358
 359 /**
 360  * Do procedure cloning. Evaluate a heuristic weight for every
 361  * Call(..., Const, ...). If the weight is bigger than threshold,
 362  * clone the entity and fix the calls.
 363  *
 364  * @param threshold   the threshold for cloning
 365  *
 366  * The threshold is an estimation of how many instructions are saved
 367  * when executing a cloned method. If threshold is 0.0, every possible
 368  * call is cloned.
 369  */
 370 void proc_cloning(float threshold);
 371
 372 /**
 373  * Reassociation.
 374  *
 375  * Applies Reassociation rules to integer expressions.
 376  * Beware: Works only if integer overflow might be ignored, as for C, Java
 377  * and for address expression.
 378  * Works only if Constant folding is activated.
 379  *
 380  * Uses loop information to detect loop-invariant (ie contant
 381  * inside the loop) values.
 382  *
 383  * See Muchnik 12.3.1 Algebraic Simplification and Reassociation of
 384  * Addressing Expressions.
 385  *
 386  * @return non-zero if the optimization could be applied, 0 else
 387  */
 388 int optimize_reassociation(ir_graph *irg);
 389
 390 /**
 391  * Normalize the Returns of a graph by creating a new End block
 392  * with One Return(Phi).
 393  * This is the preferred input for the if-conversion.
 394  *
 395  * In pseudocode, it means:
 396  *
 397  * if (a)
 398  *   return b;
 399  * else
 400  *   return c;
 401  *
 402  * is transformed into
 403  *
 404  * if (a)
 405  *   res = b;
 406  * else
 407  *   res = c;
 408  * return res;
 409  */
 410 void normalize_one_return(ir_graph *irg);
 411
 412 /**
 413  * Normalize the Returns of a graph by moving
 414  * the Returns upwards as much as possible.
 415  * This might be preferred for code generation.
 416  *
 417  * In pseudocode, it means:
 418  *
 419  * if (a)
 420  *   res = b;
 421  * else
 422  *   res = c;
 423  * return res;
 424  *
 425  * is transformed into
 426  *
 427  * if (a)
 428  *   return b;
 429  * else
 430  *   return c;
 431  */
 432 void normalize_n_returns(ir_graph *irg);
 433
 434 /**
 435  * Do the scalar replacement optimization.
 436  * Replace local compound entities (like structures and arrays)
 437  * with atomic values if possible. Does not handle classes yet.
 438  *
 439  * @param irg  the graph which should be optimized
 440  *
 441  * @return non-zero, if at least one entity was replaced
 442  */
 443 int scalar_replacement_opt(ir_graph *irg);
 444
 445 /** Performs strength reduction for the passed graph. */
 446 void reduce_strength(ir_graph *irg);
 447
 448 /**
 449  * Optimizes tail-recursion calls by converting them into loops.
 450  * Depends on the flag opt_tail_recursion.
 451  * Currently supports the following forms:
 452  *  - return func();
 453  *  - return x + func();
 454  *  - return func() - x;
 455  *  - return x * func();
 456  *  - return -func();
 457  *
 458  * Does not work for Calls that use the exception stuff.
 459  *
 460  * @param irg   the graph to be optimized
 461  *
 462  * @return non-zero if the optimization could be applied, 0 else
 463  */
 464 int opt_tail_rec_irg(ir_graph *irg);
 465
 466 /**
 467  * Optimize tail-recursion calls for all IR-Graphs.
 468  * Can currently handle:
 469  * - direct return value, i.e. return func().
 470  * - additive return value, i.e. return x +/- func()
 471  * - multiplicative return value, i.e. return x * func() or return -func()
 472  *
 473  * The current implementation must be run before optimize_funccalls(),
 474  * because it expects the memory edges pointing to calls, which might be
 475  * removed by optimize_funccalls().
 476  */
 477 void opt_tail_recursion(void);
 478
 479 /** This is the type for a method, that returns a pointer type to
 480  *  tp.  This is needed in the normalization. */
 481 typedef ir_type *(*gen_pointer_type_to_func)(ir_type *tp);
 482
 483 /**  Insert Casts so that class type casts conform exactly with the type hierarchy.
 484  *
 485  *  Formulated in Java, this achieves the following:
 486  *
 487  *  For a class hierarchy
 488  *    class A {}
 489  *    class B extends A {}
 490  *    class C extends B {}
 491  *  we transforms a cast
 492  *    (A)new C()
 493  *  to
 494  *    (A)((B)new C()).
 495  *
 496  *  The algorithm works for Casts with class types, but also for Casts
 497  *  with all pointer types that point (over several indirections,
 498  *  i.e. ***A) to a class type.  Normalizes all graphs.  Computes type
 499  *  information (@see irtypeinfo.h) if not available.
 500  *  Invalidates trout information as new casts are generated.
 501  *
 502  *  @param gppt_fct A function that returns a pointer type that points
 503  *    to the type given as argument.  If this parameter is NULL, a default
 504  *    function is used that either uses trout information or performs a O(n)
 505  *    search to find an existing pointer type.  If it can not find a type,
 506  *    generates a pointer type with mode_P_mach and suffix "cc_ptr_tp".
 507  */
 508 void normalize_irp_class_casts(gen_pointer_type_to_func gppt_fct);
 509
 510
 511 /**  Insert Casts so that class type casts conform exactly with the type hierarchy
 512  *   in given graph.
 513  *
 514  *   For more details see normalize_irp_class_casts().
 515  *
 516  *  This transformation requires that type information is computed. @see irtypeinfo.h.
 517  */
 518 void normalize_irg_class_casts(ir_graph *irg, gen_pointer_type_to_func gppt_fct);
 519
 520
 521 /** Optimize casting between class types.
 522  *
 523  *    class A { m(); }
 524  *    class B extends A { }
 525  *    class C extends B {}
 526  *  Performs the following transformations:
 527  *    C c = (C)(B)(A)(B)new C()  --> C c = (C)(B)newC() --> C c = new C()
 528  *    (Optimizing downcasts as A a = (A)(B)(new A()) --> A a = new A() can
 529  *     be suppressed by setting the flag opt_suppress_downcast_optimization.
 530  *     Downcasting A to B might cause an exception.  It is not clear
 531  *     whether this is modeled by the Firm Cast node, as it has no exception
 532  *     outputs.);
 533  *  If there is inh_m() that overwrites m() in B:
 534  *    ((A) new B()).m()  --> (new B()).inh_m()
 535  *  Phi((A)x, (A)y)  --> (A) Phi (x, y)  if (A) is an upcast.
 536  *
 537  *  Computes type information if not available. @see irtypeinfo.h.
 538  *  Typeinformation is valid after optimization.
 539  *  Invalidates trout information.
 540  */
 541 void optimize_class_casts(void);
 542
 543 /**
 544  * CLiff Click's combo algorithm from "Combining Analyses, combining Optimizations".
 545  *
 546  * Does conditional constant propagation, unreachable code elimination and optimistic
 547  * global value numbering at once.
 548  *
 549  * @param irg  the graph to run on
 550  */
 551 void combo(ir_graph *irg);
 552
 553 /** Inlines all small methods at call sites where the called address comes
 554  *  from a SymConst node that references the entity representing the called
 555  *  method.
 556  *
 557  *  The size argument is a rough measure for the code size of the method:
 558  *  Methods where the obstack containing the firm graph is smaller than
 559  *  size are inlined.  Further only a limited number of calls are inlined.
 560  *  If the method contains more than 1024 inlineable calls none will be
 561  *  inlined.
 562  *  Inlining is only performed if flags `optimize' and `inlineing' are set.
 563  *  The graph may not be in state phase_building.
 564  *  It is recommended to call local_optimize_graph() after inlining as this
 565  *  function leaves a set of obscure Tuple nodes, e.g. a Proj-Tuple-Jmp
 566  *  combination as control flow operation.
 567  */
 568 void inline_small_irgs(ir_graph *irg, int size);
 569
 570
 571 /** Inlineing with a different heuristic than inline_small_irgs().
 572  *
 573  *  Inlines leave functions.  If inlinening creates new leave
 574  *  function inlines these, too. (If g calls f, and f calls leave h,
 575  *  h is first inlined in f and then f in g.)
 576  *
 577  *  Then inlines all small functions (this is not recursive).
 578  *
 579  *  For a heuristic this inlineing uses firm node counts.  It does
 580  *  not count auxiliary nodes as Proj, Tuple, End, Start, Id, Sync.
 581  *  If the ignore_runtime flag is set, calls to functions marked with the
 582  *  mtp_property_runtime property are ignored.
 583  *
 584  *  @param maxsize         Do not inline any calls if a method has more than
 585  *                         maxsize firm nodes.  It may reach this limit by
 586  *                         inlineing.
 587  *  @param leavesize       Inline leave functions if they have less than leavesize
 588  *                         nodes.
 589  *  @param size            Inline all function smaller than size.
 590  *  @param ignore_runtime  count a function only calling runtime functions as
 591  *                         leave
 592  */
 593 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
 594                 unsigned size, int ignore_runtime);
 595
 596 /**
 597  * Heuristic inliner. Calculates a benefice value for every call and inlines
 598  * those calls with a value higher than the threshold.
 599  *
 600  * @param maxsize      Do not inline any calls if a method has more than
 601  *                     maxsize firm nodes.  It may reach this limit by
 602  *                     inlineing.
 603  * @param threshold    inlining threshold
 604  */
 605 void inline_functions(unsigned maxsize, int inline_threshold);
 606
 607 /**
 608  * Combines congruent blocks into one.
 609  *
 610  * @param irg   The IR-graph to optimize.
 611  *
 612  * @return non-zero if the graph was transformed
 613  */
 614 int shape_blocks(ir_graph *irg);
 615
 616 #endif