X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fir%2Firgopt.c;h=40d5084f3e2211a016377730777f226c6a3073f9;hb=0e9850eda71010d0741347734e6dcacd7c818461;hp=c8ccc9064d459b7f9d48e5cfef73c4f4b93f0dec;hpb=c5b429cfffe54eaa93b55a60e70854c706fc612e;p=libfirm diff --git a/ir/ir/irgopt.c b/ir/ir/irgopt.c index c8ccc9064..40d5084f3 100644 --- a/ir/ir/irgopt.c +++ b/ir/ir/irgopt.c @@ -1,18 +1,22 @@ -/* Coyright (C) 1998 - 2002 by Universitaet Karlsruhe -** All rights reserved. -** -** Author: Christian Schaefer, Goetz Lindenmaier, Sebastian Felis -** -** Optimizations for a whole ir graph, i.e., a procedure. -*/ +/* + * Project: libFIRM + * File name: ir/ir/irgopt.c + * Purpose: Optimizations for a whole ir graph, i.e., a procedure. + * Author: Christian Schaefer, Goetz Lindenmaier + * Modified by: Sebastian Felis + * Created: + * CVS-ID: $Id$ + * Copyright: (c) 1998-2003 Universität Karlsruhe + * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE. + */ -/* $Id$ */ #ifdef HAVE_CONFIG_H # include #endif # include +# include # include "irprog.h" # include "irgopt.h" @@ -21,10 +25,10 @@ # include "iropt_t.h" # include "irgwalk.h" # include "ircons.h" -# include "misc.h" # include "irgmod.h" # include "array.h" # include "pset.h" +# include "eset.h" # include "pdeq.h" /* Fuer code placement */ # include "irouts.h" # include "irloop.h" @@ -39,17 +43,25 @@ void add_identities (pset *value_table, ir_node *node); /* apply optimizations of iropt to all nodes. */ /********************************************************************/ -void init_link (ir_node *n, void *env) { +static void init_link (ir_node *n, void *env) { set_irn_link(n, NULL); } -void +#if 0 /* Old version. Avoids Ids. + This is not necessary: we do a postwalk, and get_irn_n + removes ids anyways. So it's much cheaper to call the + optimization less often and use the exchange() algorithm. */ +static void optimize_in_place_wrapper (ir_node *n, void *env) { - int i; - ir_node *optimized; - - for (i = 0; i < get_irn_arity(n); i++) { - optimized = optimize_in_place_2(get_irn_n(n, i)); + int i, irn_arity; + ir_node *optimized, *old; + + irn_arity = get_irn_arity(n); + for (i = 0; i < irn_arity; i++) { + /* get_irn_n skips Id nodes, so comparison old != optimized does not + show all optimizations. Therefore always set new predecessor. */ + old = get_irn_n(n, i); + optimized = optimize_in_place_2(old); set_irn_n(n, i, optimized); } @@ -58,6 +70,15 @@ optimize_in_place_wrapper (ir_node *n, void *env) { if (optimized != n) exchange (n, optimized); } } +#else +static void +optimize_in_place_wrapper (ir_node *n, void *env) { + ir_node *optimized = optimize_in_place_2(n); + if (optimized != n) exchange (n, optimized); +} +#endif + + void local_optimize_graph (ir_graph *irg) { @@ -89,14 +110,14 @@ local_optimize_graph (ir_graph *irg) { /********************************************************************/ /* Remeber the new node in the old node by using a field all nodes have. */ -INLINE void +static INLINE void set_new_node (ir_node *old, ir_node *new) { old->link = new; } /* Get this new node, before the old node is forgotton.*/ -INLINE ir_node * +static INLINE ir_node * get_new_node (ir_node * n) { return n->link; @@ -108,9 +129,9 @@ get_new_node (ir_node * n) Remembering the arity is useful, as it saves a lot of pointer accesses. This function is called for all Phi and Block nodes in a Block. */ -INLINE int +static INLINE int compute_new_arity(ir_node *b) { - int i, res; + int i, res, irn_arity; int irg_v, block_v; irg_v = get_irg_block_visited(current_ir_graph); @@ -121,8 +142,8 @@ compute_new_arity(ir_node *b) { return block_v - irg_v; } else { /* compute the number of good predecessors */ - res = get_irn_arity(b); - for (i = 0; i < get_irn_arity(b); i++) + res = irn_arity = get_irn_arity(b); + for (i = 0; i < irn_arity; i++) if (get_irn_opcode(get_irn_n(b, i)) == iro_Bad) res--; /* save it in the flag. */ set_Block_block_visited(b, irg_v + res); @@ -157,6 +178,11 @@ copy_node (ir_node *n, void *env) { ir_node *nn, *block; int new_arity; + /* The end node looses it's flexible in array. This doesn't matter, + as dead node elimination builds End by hand, inlineing doesn't use + the End node. */ + //assert(n->op == op_End || ((_ARR_DESCR(n->in))->cookie != ARR_F_MAGIC)); + if (get_irn_opcode(n) == iro_Block) { block = NULL; new_arity = compute_new_arity(n); @@ -193,7 +219,7 @@ copy_node (ir_node *n, void *env) { static void copy_preds (ir_node *n, void *env) { ir_node *nn, *block; - int i, j; + int i, j, irn_arity; nn = get_new_node(n); @@ -204,10 +230,11 @@ copy_preds (ir_node *n, void *env) { if (get_irn_opcode(n) == iro_Block) { /* Don't copy Bad nodes. */ j = 0; - for (i = 0; i < get_irn_arity(n); i++) + irn_arity = get_irn_arity(n); + for (i = 0; i < irn_arity; i++) if (get_irn_opcode(get_irn_n(n, i)) != iro_Bad) { set_irn_n (nn, j, get_new_node(get_irn_n(n, i))); - //if (is_backedge(n, i)) set_backedge(nn, j); + /*if (is_backedge(n, i)) set_backedge(nn, j);*/ j++; } /* repair the block visited flag from above misuse. Repair it in both @@ -218,7 +245,7 @@ copy_preds (ir_node *n, void *env) { in array contained Bads. Now it's possible. We don't call optimize_in_place as it requires that the fields in ir_graph are set properly. */ - if ((get_opt_control_flow()) && + if ((get_opt_control_flow_straightening()) && (get_Block_n_cfgpreds(nn) == 1) && (get_irn_op(get_Block_cfgpred(nn, 0)) == op_Jmp)) exchange(nn, get_nodes_Block(get_Block_cfgpred(nn, 0))); @@ -228,10 +255,11 @@ copy_preds (ir_node *n, void *env) { block = get_nodes_Block(n); set_irn_n (nn, -1, get_new_node(block)); j = 0; - for (i = 0; i < get_irn_arity(n); i++) + irn_arity = get_irn_arity(n); + for (i = 0; i < irn_arity; i++) if (get_irn_opcode(get_irn_n(block, i)) != iro_Bad) { set_irn_n (nn, j, get_new_node(get_irn_n(n, i))); - //if (is_backedge(n, i)) set_backedge(nn, j); + /*if (is_backedge(n, i)) set_backedge(nn, j);*/ j++; } /* If the pre walker reached this Phi after the post walker visited the @@ -242,7 +270,8 @@ copy_preds (ir_node *n, void *env) { if (get_irn_arity(n) == 1) exchange(n, get_irn_n(n, 0)); } else { - for (i = -1; i < get_irn_arity(n); i++) + irn_arity = get_irn_arity(n); + for (i = -1; i < irn_arity; i++) set_irn_n (nn, i, get_new_node(get_irn_n(n, i))); } /* Now the new node is complete. We can add it to the hash table for cse. @@ -253,10 +282,10 @@ copy_preds (ir_node *n, void *env) { /* Copies the graph recursively, compacts the keepalive of the end node. */ static void -copy_graph () { +copy_graph (void) { ir_node *oe, *ne; /* old end, new end */ ir_node *ka; /* keep alive */ - int i; + int i, irn_arity; oe = get_irg_end(current_ir_graph); /* copy the end node by hand, allocate dynamic in array! */ @@ -279,7 +308,8 @@ copy_graph () { /** ... and now the keep alives. **/ /* First pick the not marked block nodes and walk them. We must pick these first as else we will oversee blocks reachable from Phis. */ - for (i = 0; i < get_irn_arity(oe); i++) { + irn_arity = get_irn_arity(oe); + for (i = 0; i < irn_arity; i++) { ka = get_irn_n(oe, i); if ((get_irn_op(ka) == op_Block) && (get_irn_visited(ka) < get_irg_visited(current_ir_graph))) { @@ -291,7 +321,8 @@ copy_graph () { } /* Now pick the Phis. Here we will keep all! */ - for (i = 0; i < get_irn_arity(oe); i++) { + irn_arity = get_irn_arity(oe); + for (i = 0; i < irn_arity; i++) { ka = get_irn_n(oe, i); if ((get_irn_op(ka) == op_Phi)) { if (get_irn_visited(ka) < get_irg_visited(current_ir_graph)) { @@ -308,8 +339,8 @@ copy_graph () { in current_ir_graph and fixes the environment. Then fixes the fields in current_ir_graph containing nodes of the graph. */ -void -copy_graph_env () { +static void +copy_graph_env (void) { ir_node *old_end; /* Not all nodes remembered in current_ir_graph might be reachable from the end node. Assure their link is set to NULL, so that @@ -353,11 +384,13 @@ copy_graph_env () { copy_preds(get_irg_bad(current_ir_graph), NULL); } set_irg_bad(current_ir_graph, get_new_node(get_irg_bad(current_ir_graph))); + /* GL removed: we need unknown with mode for analyses. if (get_irn_link(get_irg_unknown(current_ir_graph)) == NULL) { copy_node(get_irg_unknown(current_ir_graph), NULL); copy_preds(get_irg_unknown(current_ir_graph), NULL); } set_irg_unknown(current_ir_graph, get_new_node(get_irg_unknown(current_ir_graph))); + */ } /* Copies all reachable nodes to a new obstack. Removes bad inputs @@ -379,6 +412,7 @@ dead_node_elimination(ir_graph *irg) { /* Handle graph state */ assert(get_irg_phase_state(current_ir_graph) != phase_building); + assert(get_irg_callee_info_state(current_ir_graph) == irg_callee_info_none); free_outs(current_ir_graph); /* @@@ so far we loose loops when copying */ @@ -504,10 +538,13 @@ void remove_bad_predecessors(ir_graph *irg) { /* Funcionality for inlining */ /**********************************************************************/ -/* Copy node for inlineing. Copies the node by calling copy_node and - then updates the entity if it's a local one. env must be a pointer - to the frame type of the procedure. The new entities must be in - the link field of the entities. */ +/* Copy node for inlineing. Updates attributes that change when + * inlineing but not for dead node elimination. + * + * Copies the node by calling copy_node and then updates the entity if + * it's a local one. env must be a pointer of the frame type of the + * inlined procedure. The new entities must be in the link field of + * the entities. */ static INLINE void copy_node_inline (ir_node *n, void *env) { ir_node *new; @@ -520,9 +557,13 @@ copy_node_inline (ir_node *n, void *env) { if (get_entity_owner(get_Sel_entity(n)) == frame_tp) { set_Sel_entity(new, get_entity_link(get_Sel_entity(n))); } + } else if (get_irn_op(n) == op_Block) { + new = get_new_node (n); + new->attr.block.irg = current_ir_graph; } } + void inline_method(ir_node *call, ir_graph *called_graph) { ir_node *pre_call; ir_node *post_call, *post_bl; @@ -531,12 +572,12 @@ void inline_method(ir_node *call, ir_graph *called_graph) { ir_node **res_pred; ir_node **cf_pred; ir_node *ret, *phi; - ir_node *cf_op = NULL, *bl; - int arity, n_ret, n_exc, n_res, i, j, rem_opt; + int arity, n_ret, n_exc, n_res, i, j, rem_opt, irn_arity; + int exc_handling; ir_node *proj; type *called_frame; if (!get_optimize() || !get_opt_inline()) return; - /** Turn off optimizations, this can cause problems when allocating new nodes. **/ + /* -- Turn off optimizations, this can cause problems when allocating new nodes. -- */ rem_opt = get_optimize(); set_optimize(0); @@ -547,21 +588,48 @@ void inline_method(ir_node *call, ir_graph *called_graph) { if (get_irg_outs_state(current_ir_graph) == outs_consistent) set_irg_outs_inconsistent(current_ir_graph); - /** Check preconditions **/ + /* -- Check preconditions -- */ assert(get_irn_op(call) == op_Call); - /* @@@ TODO does not work for InterfaceIII.java after cgana + /* @@@ does not work for InterfaceIII.java after cgana assert(get_Call_type(call) == get_entity_type(get_irg_ent(called_graph))); assert(smaller_type(get_entity_type(get_irg_ent(called_graph)), get_Call_type(call))); */ assert(get_type_tpop(get_Call_type(call)) == type_method); - if (called_graph == current_ir_graph) return; + if (called_graph == current_ir_graph) { + set_optimize(rem_opt); + return; + } + /* -- Decide how to handle exception control flow: Is there a handler + for the Call node, or do we branch directly to End on an exception? + exc_handling: 0 There is a handler. + 1 Branches to End. + 2 Exception handling not represented in Firm. -- */ + exc_handling = 2; + for (proj = (ir_node *)get_irn_link(call); proj; proj = (ir_node *)get_irn_link(proj)) { + assert(get_irn_op(proj) == op_Proj); + if (get_Proj_proj(proj) == pn_Call_M_except) { exc_handling = 0; break;} + if (get_Proj_proj(proj) == pn_Call_X_except) { exc_handling = 1; } + } + + { + ir_node *proj, *Mproj = NULL, *Xproj = NULL; + for (proj = (ir_node *)get_irn_link(call); proj; proj = (ir_node *)get_irn_link(proj)) { + assert(get_irn_op(proj) == op_Proj); + if (get_Proj_proj(proj) == pn_Call_X_except) Xproj = proj; + if (get_Proj_proj(proj) == pn_Call_M_except) Mproj = proj; + } + if (Mproj) { assert(Xproj); exc_handling = 0; } + else if (Xproj) { exc_handling = 1; } + else { exc_handling = 2; } + } - /** Part the Call node into two nodes. Pre_call collects the parameters of + + /* -- the procedure and later replaces the Start node of the called graph. Post_call is the old Call node and collects the results of the called - graph. Both will end up being a tuple. **/ + graph. Both will end up being a tuple. -- */ post_bl = get_nodes_Block(call); set_irg_current_block(current_ir_graph, post_bl); /* XxMxPxP of Start + parameter of Call */ @@ -573,12 +641,12 @@ void inline_method(ir_node *call, ir_graph *called_graph) { pre_call = new_Tuple(5, in); post_call = call; - /** Part the block of the Call node into two blocks. + /* -- The new block gets the ins of the old block, pre_call and all its - predecessors and all Phi nodes. **/ + predecessors and all Phi nodes. -- */ part_block(pre_call); - /** Prepare state for dead node elimination **/ + /* -- Prepare state for dead node elimination -- */ /* Visited flags in calling irg must be >= flag in called irg. Else walker and arity computation will not work. */ if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph)) @@ -601,7 +669,7 @@ void inline_method(ir_node *call, ir_graph *called_graph) { /* Initialize for compaction of in arrays */ inc_irg_block_visited(current_ir_graph); - /*** Replicate local entities of the called_graph ***/ + /* -- Replicate local entities of the called_graph -- */ /* copy the entities. */ called_frame = get_irg_frame_type(called_graph); for (i = 0; i < get_class_n_members(called_frame); i++) { @@ -616,7 +684,7 @@ void inline_method(ir_node *call, ir_graph *called_graph) { to inline, calling this inline will not visit the inlined nodes. */ set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1); - /** Performing dead node elimination inlines the graph **/ + /* -- Performing dead node elimination inlines the graph -- */ /* Copies the nodes to the obstack of current_ir_graph. Updates links to new entities. */ /* @@@ endless loops are not copied!! -- they should be, I think... */ @@ -628,36 +696,40 @@ void inline_method(ir_node *call, ir_graph *called_graph) { set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph)); set_Block_block_visited(get_irg_start_block(called_graph), 0); - /*** Merge the end of the inlined procedure with the call site ***/ + /* -- Merge the end of the inlined procedure with the call site -- */ /* We will turn the old Call node into a Tuple with the following predecessors: - -1: Block of Tuple. - 0: Phi of all Memories of Return statements. - 1: Jmp from new Block that merges the control flow from all exception - predecessors of the old end block. - 2: Tuple of all arguments. - 3: Phi of Exception memories. + -1: Block of Tuple. + 0: Phi of all Memories of Return statements. + 1: Jmp from new Block that merges the control flow from all exception + predecessors of the old end block. + 2: Tuple of all arguments. + 3: Phi of Exception memories. + In case the old Call directly branches to End on an exception we don't + need the block merging all exceptions nor the Phi of the exception + memories. */ - /** Precompute some values **/ + /* -- Precompute some values -- */ end_bl = get_new_node(get_irg_end_block(called_graph)); end = get_new_node(get_irg_end(called_graph)); arity = get_irn_arity(end_bl); /* arity = n_exc + n_ret */ n_res = get_method_n_ress(get_Call_type(call)); res_pred = (ir_node **) malloc (n_res * sizeof (ir_node *)); - cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *)); + cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *)); set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */ - /** archive keepalives **/ - for (i = 0; i < get_irn_arity(end); i++) + /* -- archive keepalives -- */ + irn_arity = get_irn_arity(end); + for (i = 0; i < irn_arity; i++) add_End_keepalive(get_irg_end(current_ir_graph), get_irn_n(end, i)); - /* The new end node will die, but the in array is not on the obstack ... */ - free_End(end); - /** Collect control flow from Return blocks to post_calls block. Replace - Return nodes by Jump nodes. **/ + /* The new end node will die. We need not free as the in array is on the obstack: + copy_node only generated 'D' arrays. */ + + /* -- Replace Return nodes by Jump nodes. -- */ n_ret = 0; for (i = 0; i < arity; i++) { ir_node *ret; @@ -669,8 +741,8 @@ void inline_method(ir_node *call, ir_graph *called_graph) { } set_irn_in(post_bl, n_ret, cf_pred); - /** Collect results from Return nodes to post_call. Post_call is - turned into a tuple. **/ + /* -- Build a Tuple for all results of the method. + Add Phi node if there was more than one Return. -- */ turn_into_tuple(post_call, 4); /* First the Memory-Phi */ n_ret = 0; @@ -711,83 +783,128 @@ void inline_method(ir_node *call, ir_graph *called_graph) { } else { set_Tuple_pred(call, 2, new_Bad()); } - /* Finally the exception control flow. We need to add a Phi node to + /* Finally the exception control flow. + We have two (three) possible situations: + First if the Call branches to an exception handler: We need to add a Phi node to collect the memory containing the exception objects. Further we need to add another block to get a correct representation of this Phi. To this block we add a Jmp that resolves into the X output of the Call - when the Call is turned into a tuple. */ - n_exc = 0; - for (i = 0; i < arity; i++) { - ir_node *ret; - ret = get_irn_n(end_bl, i); - if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) { - cf_pred[n_exc] = ret; - n_exc++; - } - } - if (n_exc > 0) { - new_Block(n_exc, cf_pred); /* watch it: current_block is changed! */ - set_Tuple_pred(call, 1, new_Jmp()); - /* The Phi for the memories with the exception objects */ + when the Call is turned into a tuple. + Second the Call branches to End, the exception is not handled. Just + add all inlined exception branches to the End node. + Third: there is no Exception edge at all. Handle as case two. */ + if (exc_handler == 0) { n_exc = 0; for (i = 0; i < arity; i++) { ir_node *ret; - ret = skip_Proj(get_irn_n(end_bl, i)); - if (get_irn_op(ret) == op_Call) { - cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 3); + ret = get_irn_n(end_bl, i); + if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) { + cf_pred[n_exc] = ret; n_exc++; - } else if (is_fragile_op(ret)) { + } + } + if (n_exc > 0) { + new_Block(n_exc, cf_pred); /* watch it: current_block is changed! */ + set_Tuple_pred(call, 1, new_Jmp()); + /* The Phi for the memories with the exception objects */ + n_exc = 0; + for (i = 0; i < arity; i++) { + ir_node *ret; + ret = skip_Proj(get_irn_n(end_bl, i)); + if (get_irn_op(ret) == op_Call) { + cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 3); + n_exc++; + } else if (is_fragile_op(ret)) { /* We rely that all cfops have the memory output at the same position. */ - cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 0); - n_exc++; - } else if (get_irn_op(ret) == op_Raise) { - cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 1); - n_exc++; + cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 0); + n_exc++; + } else if (get_irn_op(ret) == op_Raise) { + cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 1); + n_exc++; + } } + set_Tuple_pred(call, 3, new_Phi(n_exc, cf_pred, mode_M)); + } else { + set_Tuple_pred(call, 1, new_Bad()); + set_Tuple_pred(call, 3, new_Bad()); } - set_Tuple_pred(call, 3, new_Phi(n_exc, cf_pred, mode_M)); } else { + ir_node *main_end_bl; + int main_end_bl_arity; + ir_node **end_preds; + + /* assert(exc_handler == 1 || no exceptions. ) */ + n_exc = 0; + for (i = 0; i < arity; i++) { + ir_node *ret = get_irn_n(end_bl, i); + + if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) { + cf_pred[n_exc] = ret; + n_exc++; + } + } + main_end_bl = get_irg_end_block(current_ir_graph); + main_end_bl_arity = get_irn_arity(main_end_bl); + end_preds = (ir_node **) malloc ((n_exc + main_end_bl_arity) * sizeof (ir_node *)); + + for (i = 0; i < main_end_bl_arity; ++i) + end_preds[i] = get_irn_n(main_end_bl, i); + for (i = 0; i < n_exc; ++i) + end_preds[main_end_bl_arity + i] = cf_pred[i]; + set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds); set_Tuple_pred(call, 1, new_Bad()); set_Tuple_pred(call, 3, new_Bad()); + free(end_preds); } free(res_pred); free(cf_pred); - /*** Correct the control flow to the end node. - If the exception control flow from the Call directly branched to the - end block we now have the following control flow predecessor pattern: - ProjX -> Tuple -> Jmp. - We must remove the Jmp along with it's empty block and add Jmp's - predecessors as predecessors of this end block. ***/ - /* find the problematic predecessor of the end block. */ - end_bl = get_irg_end_block(current_ir_graph); - for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) { - cf_op = get_Block_cfgpred(end_bl, i); - if (get_irn_op(cf_op) == op_Proj) { - cf_op = get_Proj_pred(cf_op); - if (get_irn_op(cf_op) == op_Tuple) { - cf_op = get_Tuple_pred(cf_op, 1); - assert(get_irn_op(cf_op) == op_Jmp); - break; +#if 0 /* old. now better, correcter, faster implementation. */ + if (n_exc > 0) { + /* -- If the exception control flow from the inlined Call directly + branched to the end block we now have the following control + flow predecessor pattern: ProjX -> Tuple -> Jmp. We must + remove the Jmp along with it's empty block and add Jmp's + predecessors as predecessors of this end block. No problem if + there is no exception, because then branches Bad to End which + is fine. -- + @@@ can't we know this beforehand: by getting the Proj(1) from + the Call link list and checking whether it goes to Proj. */ + /* find the problematic predecessor of the end block. */ + end_bl = get_irg_end_block(current_ir_graph); + for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) { + cf_op = get_Block_cfgpred(end_bl, i); + if (get_irn_op(cf_op) == op_Proj) { + cf_op = get_Proj_pred(cf_op); + if ((get_irn_op(cf_op) == op_Tuple) && (cf_op == call)) { + // There are unoptimized tuples from inlineing before when no exc + assert(get_Proj_proj(get_Block_cfgpred(end_bl, i)) == pn_Call_X_except); + cf_op = get_Tuple_pred(cf_op, pn_Call_X_except); + assert(get_irn_op(cf_op) == op_Jmp); + break; + } } } + /* repair */ + if (i < get_Block_n_cfgpreds(end_bl)) { + bl = get_nodes_Block(cf_op); + arity = get_Block_n_cfgpreds(end_bl) + get_Block_n_cfgpreds(bl) - 1; + cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *)); + for (j = 0; j < i; j++) + cf_pred[j] = get_Block_cfgpred(end_bl, j); + for (j = j; j < i + get_Block_n_cfgpreds(bl); j++) + cf_pred[j] = get_Block_cfgpred(bl, j-i); + for (j = j; j < arity; j++) + cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1); + set_irn_in(end_bl, arity, cf_pred); + free(cf_pred); + // Remove the exception pred from post-call Tuple. + set_Tuple_pred(call, pn_Call_X_except, new_Bad()); + } } - /* repair */ - if (i < get_Block_n_cfgpreds(end_bl)) { - bl = get_nodes_Block(cf_op); - arity = get_Block_n_cfgpreds(end_bl) + get_Block_n_cfgpreds(bl) - 1; - cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *)); - for (j = 0; j < i; j++) - cf_pred[j] = get_Block_cfgpred(end_bl, j); - for (j = j; j < i + get_Block_n_cfgpreds(bl); j++) - cf_pred[j] = get_Block_cfgpred(bl, j-i); - for (j = j; j < arity; j++) - cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1); - set_irn_in(end_bl, arity, cf_pred); - free(cf_pred); - } +#endif - /** Turn cse back on. **/ + /* -- Turn cse back on. -- */ set_optimize(rem_opt); } @@ -801,7 +918,27 @@ static int pos; I didn't get a version with NEW_ARR_F to run. */ #define MAX_INLINE 1024 +/* given an Call node, returns the irg called. NULL if not + * known. */ +static ir_graph *get_call_called_irg(ir_node *call) { + ir_node *addr; + tarval *tv; + ir_graph *called_irg = NULL; + + assert(get_irn_op(call) == op_Call); + + addr = get_Call_ptr(call); + if (get_irn_op(addr) == op_Const) { + /* Check whether the constant is the pointer to a compiled entity. */ + tv = get_Const_tarval(addr); + if (tarval_to_entity(tv)) + called_irg = get_entity_irg(tarval_to_entity(tv)); + } + return called_irg; +} + static void collect_calls(ir_node *call, void *env) { + ir_node **calls = (ir_node **)env; ir_node *addr; tarval *tv; @@ -813,12 +950,12 @@ static void collect_calls(ir_node *call, void *env) { if (get_irn_op(addr) == op_Const) { /* Check whether the constant is the pointer to a compiled entity. */ tv = get_Const_tarval(addr); - if (tv->u.P.ent) { - called_irg = get_entity_irg(tv->u.P.ent); + if (tarval_to_entity(tv)) { + called_irg = get_entity_irg(tarval_to_entity(tv)); if (called_irg && pos < MAX_INLINE) { - /* The Call node calls a locally defined method. Remember to inline. */ - calls[pos] = call; - pos++; + /* The Call node calls a locally defined method. Remember to inline. */ + calls[pos] = call; + pos++; } } } @@ -840,6 +977,7 @@ void inline_small_irgs(ir_graph *irg, int size) { current_ir_graph = irg; /* Handle graph state */ assert(get_irg_phase_state(current_ir_graph) != phase_building); + assert(get_irg_callee_info_state(current_ir_graph) == irg_callee_info_none); /* Find Call nodes to inline. (We can not inline during a walk of the graph, as inlineing the same @@ -853,13 +991,12 @@ void inline_small_irgs(ir_graph *irg, int size) { /* There are calls to inline */ collect_phiprojs(irg); for (i = 0; i < pos; i++) { - char buf[1024]; tarval *tv; ir_graph *callee; tv = get_Const_tarval(get_Call_ptr(calls[i])); - callee = get_entity_irg(tv->u.P.ent); + callee = get_entity_irg(tarval_to_entity(tv)); if ((_obstack_memory_used(callee->obst) - obstack_room(callee->obst)) < size) { - inline_method(calls[i], callee); + inline_method(calls[i], callee); } } } @@ -867,20 +1004,215 @@ void inline_small_irgs(ir_graph *irg, int size) { current_ir_graph = rem; } +/** + * Environment for inlining irgs. + */ +typedef struct { + int n_nodes; /**< Nodes in graph except Id, Tuple, Proj, Start, End */ + int n_nodes_orig; /**< for statistics */ + eset *call_nodes; /**< All call nodes in this graph */ + int n_call_nodes; + int n_call_nodes_orig; /**< for statistics */ + int n_callers; /**< Number of known graphs that call this graphs. */ + int n_callers_orig; /**< for statistics */ +} inline_irg_env; + +static inline_irg_env *new_inline_irg_env(void) { + inline_irg_env *env = malloc(sizeof(inline_irg_env)); + env->n_nodes = -2; /* uncount Start, End */ + env->n_nodes_orig = -2; /* uncount Start, End */ + env->call_nodes = eset_create(); + env->n_call_nodes = 0; + env->n_call_nodes_orig = 0; + env->n_callers = 0; + env->n_callers_orig = 0; + return env; +} + +static void free_inline_irg_env(inline_irg_env *env) { + eset_destroy(env->call_nodes); + free(env); +} + +static void collect_calls2(ir_node *call, void *env) { + inline_irg_env *x = (inline_irg_env *)env; + ir_op *op = get_irn_op(call); + ir_graph *callee; + + /* count nodes in irg */ + if (op != op_Proj && op != op_Tuple && op != op_Sync) { + x->n_nodes++; + x->n_nodes_orig++; + } + + if (op != op_Call) return; + + /* collect all call nodes */ + eset_insert(x->call_nodes, (void *)call); + x->n_call_nodes++; + x->n_call_nodes_orig++; + + /* count all static callers */ + callee = get_call_called_irg(call); + if (callee) { + ((inline_irg_env *)get_irg_link(callee))->n_callers++; + ((inline_irg_env *)get_irg_link(callee))->n_callers_orig++; + } +} + +INLINE static int is_leave(ir_graph *irg) { + return (((inline_irg_env *)get_irg_link(irg))->n_call_nodes == 0); +} + +INLINE static int is_smaller(ir_graph *callee, int size) { + return (((inline_irg_env *)get_irg_link(callee))->n_nodes < size); +} + + +/* Inlines small leave methods at call sites where the called address comes + from a Const node that references the entity representing the called + method. + The size argument is a rough measure for the code size of the method: + Methods where the obstack containing the firm graph is smaller than + size are inlined. */ +void inline_leave_functions(int maxsize, int leavesize, int size) { + inline_irg_env *env; + int i, n_irgs = get_irp_n_irgs(); + ir_graph *rem = current_ir_graph; + int did_inline = 1; + + if (!(get_optimize() && get_opt_inline())) return; + + /* extend all irgs by a temporary data structure for inlineing. */ + for (i = 0; i < n_irgs; ++i) + set_irg_link(get_irp_irg(i), new_inline_irg_env()); + + /* Precompute information in temporary data structure. */ + for (i = 0; i < n_irgs; ++i) { + current_ir_graph = get_irp_irg(i); + assert(get_irg_phase_state(current_ir_graph) != phase_building); + assert(get_irg_callee_info_state(current_ir_graph) == irg_callee_info_none); + + irg_walk(get_irg_end(current_ir_graph), NULL, collect_calls2, + get_irg_link(current_ir_graph)); + env = (inline_irg_env *)get_irg_link(current_ir_graph); + } + + /* and now inline. + Inline leaves recursively -- we might construct new leaves. */ + //int itercnt = 1; + while (did_inline) { + //printf("iteration %d\n", itercnt++); + did_inline = 0; + for (i = 0; i < n_irgs; ++i) { + ir_node *call; + eset *walkset; + int phiproj_computed = 0; + + current_ir_graph = get_irp_irg(i); + env = (inline_irg_env *)get_irg_link(current_ir_graph); + + /* we can not walk and change a set, nor remove from it. + So recompute.*/ + walkset = env->call_nodes; + env->call_nodes = eset_create(); + for (call = eset_first(walkset); call; call = eset_next(walkset)) { + inline_irg_env *callee_env; + ir_graph *callee = get_call_called_irg(call); + + if (env->n_nodes > maxsize) break; + if (callee && is_leave(callee) && is_smaller(callee, leavesize)) { + if (!phiproj_computed) { + phiproj_computed = 1; + collect_phiprojs(current_ir_graph); + } + callee_env = (inline_irg_env *)get_irg_link(callee); +// printf(" %s: Inlineing %s.\n", get_entity_name(get_irg_entity(current_ir_graph)), +// get_entity_name(get_irg_entity(callee))); + inline_method(call, callee); + did_inline = 1; + env->n_call_nodes--; + eset_insert_all(env->call_nodes, callee_env->call_nodes); + env->n_call_nodes += callee_env->n_call_nodes; + env->n_nodes += callee_env->n_nodes; + callee_env->n_callers--; + } else { + eset_insert(env->call_nodes, call); + } + } + eset_destroy(walkset); + } + } + + //printf("Non leaves\n"); + /* inline other small functions. */ + for (i = 0; i < n_irgs; ++i) { + ir_node *call; + eset *walkset; + int phiproj_computed = 0; + + current_ir_graph = get_irp_irg(i); + env = (inline_irg_env *)get_irg_link(current_ir_graph); + + /* we can not walk and change a set, nor remove from it. + So recompute.*/ + walkset = env->call_nodes; + env->call_nodes = eset_create(); + for (call = eset_first(walkset); call; call = eset_next(walkset)) { + inline_irg_env *callee_env; + ir_graph *callee = get_call_called_irg(call); + + if (env->n_nodes > maxsize) break; + if (callee && is_smaller(callee, size)) { + if (!phiproj_computed) { + phiproj_computed = 1; + collect_phiprojs(current_ir_graph); + } + callee_env = (inline_irg_env *)get_irg_link(callee); +// printf(" %s: Inlineing %s.\n", get_entity_name(get_irg_entity(current_ir_graph)), +// get_entity_name(get_irg_entity(callee))); + inline_method(call, callee); + did_inline = 1; + env->n_call_nodes--; + eset_insert_all(env->call_nodes, callee_env->call_nodes); + env->n_call_nodes += callee_env->n_call_nodes; + env->n_nodes += callee_env->n_nodes; + callee_env->n_callers--; + } else { + eset_insert(env->call_nodes, call); + } + } + eset_destroy(walkset); + } + + for (i = 0; i < n_irgs; ++i) { + current_ir_graph = get_irp_irg(i); +#if 0 + env = (inline_irg_env *)get_irg_link(current_ir_graph); + if ((env->n_call_nodes_orig != env->n_call_nodes) || + (env->n_callers_orig != env->n_callers)) + printf("Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n", + env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes, + env->n_callers_orig, env->n_callers, + get_entity_name(get_irg_entity(current_ir_graph))); +#endif + free_inline_irg_env((inline_irg_env *)get_irg_link(current_ir_graph)); + } + + current_ir_graph = rem; +} /********************************************************************/ -/* Code Placement. Pinns all floating nodes to a block where they */ +/* Code Placement. Pins all floating nodes to a block where they */ /* will be executed only if needed. */ /********************************************************************/ -static pdeq *worklist; /* worklist of ir_node*s */ - /* Find the earliest correct block for N. --- Place N into the same Block as its dominance-deepest Input. */ static void -place_floats_early (ir_node *n) +place_floats_early(ir_node *n, pdeq *worklist) { - int i, start; + int i, start, irn_arity; /* we must not run into an infinite loop */ assert (irn_not_visited(n)); @@ -895,19 +1227,21 @@ place_floats_early (ir_node *n) if ((get_irn_op(n) == op_Const) || (get_irn_op(n) == op_SymConst) || - (is_Bad(n))) { + (is_Bad(n)) || + (get_irn_op(n) == op_Unknown)) { /* These nodes will not be placed by the loop below. */ b = get_irg_start_block(current_ir_graph); depth = 1; } /* find the block for this node. */ - for (i = 0; i < get_irn_arity(n); i++) { + irn_arity = get_irn_arity(n); + for (i = 0; i < irn_arity; i++) { ir_node *dep = get_irn_n(n, i); ir_node *dep_block; if ((irn_not_visited(dep)) && (get_op_pinned(get_irn_op(dep)) == floats)) { - place_floats_early (dep); + place_floats_early(dep, worklist); } /* Because all loops contain at least one pinned node, now all our inputs are either pinned or place_early has already @@ -930,7 +1264,8 @@ place_floats_early (ir_node *n) /* Add predecessors of non floating nodes on worklist. */ start = (get_irn_op(n) == op_Block) ? 0 : -1; - for (i = start; i < get_irn_arity(n); i++) { + irn_arity = get_irn_arity(n); + for (i = start; i < irn_arity; i++) { ir_node *pred = get_irn_n(n, i); if (irn_not_visited(pred)) { pdeq_putr (worklist, pred); @@ -942,17 +1277,17 @@ place_floats_early (ir_node *n) Start, Call and end at pinned nodes as Store, Call. Place_early places all floating nodes reachable from its argument through floating nodes and adds all beginnings at pinned nodes to the worklist. */ -INLINE void place_early () { +static INLINE void place_early(pdeq* worklist) { assert(worklist); inc_irg_visited(current_ir_graph); /* this inits the worklist */ - place_floats_early (get_irg_end(current_ir_graph)); + place_floats_early(get_irg_end(current_ir_graph), worklist); /* Work the content of the worklist. */ while (!pdeq_empty (worklist)) { ir_node *n = pdeq_getl (worklist); - if (irn_not_visited(n)) place_floats_early (n); + if (irn_not_visited(n)) place_floats_early(n, worklist); } set_irg_outs_inconsistent(current_ir_graph); @@ -969,11 +1304,12 @@ consumer_dom_dca (ir_node *dca, ir_node *consumer, ir_node *producer) /* Compute the latest block into which we can place a node so that it is before consumer. */ if (get_irn_op(consumer) == op_Phi) { - /* our comsumer is a Phi-node, the effective use is in all those + /* our consumer is a Phi-node, the effective use is in all those blocks through which the Phi-node reaches producer */ - int i; + int i, irn_arity; ir_node *phi_block = get_nodes_Block(consumer); - for (i = 0; i < get_irn_arity(consumer); i++) { + irn_arity = get_irn_arity(consumer); + for (i = 0; i < irn_arity; i++) { if (get_irn_n(consumer, i) == producer) { block = get_nodes_Block(get_Block_cfgpred(phi_block, i)); } @@ -996,7 +1332,7 @@ consumer_dom_dca (ir_node *dca, ir_node *consumer, ir_node *producer) return dca; } -INLINE int get_irn_loop_depth(ir_node *n) { +static INLINE int get_irn_loop_depth(ir_node *n) { return get_loop_depth(get_irn_loop(n)); } @@ -1036,10 +1372,10 @@ move_out_of_loops (ir_node *n, ir_node *early) `optimal' Block between the latest and earliest legal block. The `optimal' block is the dominance-deepest block of those with the least loop-nesting-depth. This places N out of as many - loops as possible and then makes it as controldependant as + loops as possible and then makes it as control dependant as possible. */ static void -place_floats_late (ir_node *n) +place_floats_late(ir_node *n, pdeq *worklist) { int i; ir_node *early; @@ -1050,13 +1386,13 @@ place_floats_late (ir_node *n) if ((get_irn_op(n) != op_Block) && (!is_cfop(n)) && (get_irn_mode(n) != mode_X)) { - /* Remember the early palacement of this block to move it + /* Remember the early placement of this block to move it out of loop no further than the early placement. */ early = get_nodes_Block(n); /* Assure that our users are all placed, except the Phi-nodes. - --- Each dataflow cycle contains at least one Phi-node. We + --- Each data flow cycle contains at least one Phi-node. We have to break the `user has to be placed before the - producer' dependance cycle and the Phi-nodes are the + producer' dependence cycle and the Phi-nodes are the place to do so, because we need to base our placement on the final region of our users, which is OK with Phi-nodes, as they are pinned, and they never have to be placed after a @@ -1064,7 +1400,7 @@ place_floats_late (ir_node *n) for (i = 0; i < get_irn_n_outs(n); i++) { ir_node *succ = get_irn_out(n, i); if (irn_not_visited(succ) && (get_irn_op(succ) != op_Phi)) - place_floats_late (succ); + place_floats_late(succ, worklist); } /* We have to determine the final block of this node... except for @@ -1096,21 +1432,23 @@ place_floats_late (ir_node *n) } } -INLINE void place_late() { +static INLINE void place_late(pdeq* worklist) { assert(worklist); inc_irg_visited(current_ir_graph); /* This fills the worklist initially. */ - place_floats_late(get_irg_start_block(current_ir_graph)); + place_floats_late(get_irg_start_block(current_ir_graph), worklist); /* And now empty the worklist again... */ while (!pdeq_empty (worklist)) { ir_node *n = pdeq_getl (worklist); - if (irn_not_visited(n)) place_floats_late(n); + if (irn_not_visited(n)) place_floats_late(n, worklist); } } void place_code(ir_graph *irg) { + pdeq* worklist; ir_graph *rem = current_ir_graph; + current_ir_graph = irg; if (!(get_optimize() && get_opt_global_cse())) return; @@ -1124,17 +1462,17 @@ void place_code(ir_graph *irg) { /* Place all floating nodes as early as possible. This guarantees a legal code placement. */ - worklist = new_pdeq (); - place_early(); + worklist = new_pdeq(); + place_early(worklist); /* place_early invalidates the outs, place_late needs them. */ compute_outs(irg); /* Now move the nodes down in the dominator tree. This reduces the unnecessary executions of the node. */ - place_late(); + place_late(worklist); set_irg_outs_inconsistent(current_ir_graph); - del_pdeq (worklist); + del_pdeq(worklist); current_ir_graph = rem; } @@ -1144,10 +1482,13 @@ void place_code(ir_graph *irg) { /* Control flow optimization. */ /* Removes Bad control flow predecessors and empty blocks. A block */ /* is empty if it contains only a Jmp node. */ -/* */ +/* Blocks can only be removed if they are not needed for the */ +/* semantics of Phi nodes. */ /********************************************************************/ - +/* Removes Tuples from Block control flow predecessors. + Optimizes blocks with equivalent_node(). + Replaces n by Bad if n is unreachable control flow. */ static void merge_blocks(ir_node *n, void *env) { int i; set_irn_link(n, NULL); @@ -1155,21 +1496,33 @@ static void merge_blocks(ir_node *n, void *env) { if (get_irn_op(n) == op_Block) { /* Remove Tuples */ for (i = 0; i < get_Block_n_cfgpreds(n); i++) - set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i))); - } else if (get_irn_mode(n) == mode_X) { + /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go through. + A different order of optimizations might cause problems. */ + if (get_opt_normalize()) + set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i))); + } else if (get_optimize() && (get_irn_mode(n) == mode_X)) { /* We will soon visit a block. Optimize it before visiting! */ ir_node *b = get_nodes_Block(n); - ir_node *new = equivalent_node(b); - while (irn_not_visited(b) && (!is_Bad(new)) && (new != b)) { - /* We would have to run gigo if new is bad. */ - if (get_optimize() && get_opt_control_flow()) exchange (b, new); - b = new; - new = equivalent_node(b); + ir_node *new_node = equivalent_node(b); + while (irn_not_visited(b) && (!is_Bad(new_node)) && (new_node != b)) { + /* We would have to run gigo if new is bad, so we + promote it directly below. */ + assert(((b == new_node) || + get_opt_control_flow_straightening() || + get_opt_control_flow_weak_simplification()) && + ("strange flag setting")); + exchange (b, new_node); + b = new_node; + new_node = equivalent_node(b); } - if (is_Bad(new)) exchange (n, new_Bad()); + /* GL @@@ get_opt_normalize hinzugefuegt, 5.5.2003 */ + if (is_Bad(new_node) && get_opt_normalize()) exchange(n, new_Bad()); } } +/* Collects all Phi nodes in link list of Block. + Marks all blocks "block_visited" if they contain a node other + than Jmp. */ static void collect_nodes(ir_node *n, void *env) { if (is_no_Block(n)) { ir_node *b = get_nodes_Block(n); @@ -1185,7 +1538,7 @@ static void collect_nodes(ir_node *n, void *env) { } /* Returns true if pred is pred of block */ -int is_pred_of(ir_node *pred, ir_node *b) { +static int is_pred_of(ir_node *pred, ir_node *b) { int i; for (i = 0; i < get_Block_n_cfgpreds(b); i++) { ir_node *b_pred = get_nodes_Block(get_Block_cfgpred(b, i)); @@ -1194,7 +1547,7 @@ int is_pred_of(ir_node *pred, ir_node *b) { return 0; } -int test_whether_dispensable(ir_node *b, int pos) { +static int test_whether_dispensable(ir_node *b, int pos) { int i, j, n_preds = 1; int dispensable = 1; ir_node *cfop = get_Block_cfgpred(b, pos); @@ -1202,7 +1555,7 @@ int test_whether_dispensable(ir_node *b, int pos) { if (get_Block_block_visited(pred) + 1 < get_irg_block_visited(current_ir_graph)) { - if (!get_optimize() || !get_opt_control_flow()) { + if (!get_optimize() || !get_opt_control_flow_strong_simplification()) { /* Mark block so that is will not be removed. */ set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1); return 1; @@ -1242,7 +1595,7 @@ int test_whether_dispensable(ir_node *b, int pos) { return n_preds; } -void optimize_blocks(ir_node *b, void *env) { +static void optimize_blocks(ir_node *b, void *env) { int i, j, k, max_preds, n_preds; ir_node *pred, *phi; ir_node **in; @@ -1255,7 +1608,7 @@ void optimize_blocks(ir_node *b, void *env) { } in = (ir_node **) malloc(max_preds * sizeof(ir_node *)); - /** Debug output ** +/** printf(" working on "); DDMN(b); for (i = 0; i < get_Block_n_cfgpreds(b); i++) { pred = get_nodes_Block(get_Block_cfgpred(b, i)); @@ -1266,7 +1619,7 @@ void optimize_blocks(ir_node *b, void *env) { printf(" removing pred %i ", i); DDMN(pred); } else { printf(" Nothing to do for "); DDMN(pred); } } - ** end Debug output **/ + * end Debug output **/ /** Fix the Phi nodes **/ phi = get_irn_link(b); @@ -1291,15 +1644,18 @@ void optimize_blocks(ir_node *b, void *env) { } n_preds++; } -#if 0 - /* @@@ hier brauche ich schleifeninformation!!! Wenn keine Rueckwaertskante - dann darfs auch keine Verwendung geben. */ + /* The Phi_pred node is replaced now if it is a Phi. + In Schleifen kann offenbar der entfernte Phi Knoten legal verwendet werden. + Daher muss der Phiknoten durch den neuen ersetzt werden. + Weiter muss der alte Phiknoten entfernt werden (durch ersetzen oder + durch einen Bad) damit er aus den keep_alive verschwinden kann. + Man sollte also, falls keine Schleife vorliegt, exchange mit new_Bad + aufrufen. */ if (get_nodes_Block(phi_pred) == pred) { /* remove the Phi as it might be kept alive. Further there might be other users. */ - exchange(phi_pred, phi); /* geht, is aber doch semantisch falsch! */ + exchange(phi_pred, phi); /* geht, ist aber doch semantisch falsch! Warum?? */ } -#endif } else { in[n_preds] = get_Phi_pred(phi, i); n_preds ++; @@ -1307,10 +1663,11 @@ void optimize_blocks(ir_node *b, void *env) { } /* Fix the node */ set_irn_in(phi, n_preds, in); + phi = get_irn_link(phi); } - /** Move Phi nodes from removed blocks to this one. +/** This happens only if merge between loop backedge and single loop entry. **/ for (k = 0; k < get_Block_n_cfgpreds(b); k++) { pred = get_nodes_Block(get_Block_cfgpred(b, k)); @@ -1330,7 +1687,7 @@ void optimize_blocks(ir_node *b, void *env) { < get_irg_block_visited(current_ir_graph)) { /* It's an empty block and not yet visited. */ for (j = 0; j < get_Block_n_cfgpreds(pred); j++) { - /* @@@ Hier brauche ich schleifeninformation!!! Kontrllflusskante + /* @@@ Hier brauche ich Schleifeninformation!!! Kontrollflusskante muss Rueckwaertskante sein! (An allen vier in[n_preds] = phi Anweisungen.) Trotzdem tuts bisher!! */ in[n_preds] = phi; @@ -1401,7 +1758,6 @@ void optimize_cf(ir_graph *irg) { ir_graph *rem = current_ir_graph; current_ir_graph = irg; - /* Handle graph state */ assert(get_irg_phase_state(irg) != phase_building); if (get_irg_outs_state(current_ir_graph) == outs_consistent) @@ -1420,7 +1776,6 @@ void optimize_cf(ir_graph *irg) { for end if useful. */ in = NEW_ARR_F (ir_node *, 1); in[0] = get_nodes_Block(end); - inc_irg_visited(current_ir_graph); for(i = 0; i < get_End_n_keepalives(end); i++) { ir_node *ka = get_End_keepalive(end, i); @@ -1442,3 +1797,55 @@ void optimize_cf(ir_graph *irg) { current_ir_graph = rem; } + + +/** + * Called by walker of remove_critical_cf_edges. + * + * Place an empty block to an edge between a blocks of multiple + * predecessors and a block of multiple successors. + * + * @param n IR node + * @param env Environment of walker. This field is unused and has + * the value NULL. + */ +static void walk_critical_cf_edges(ir_node *n, void *env) { + int arity, i; + ir_node *pre, *block, **in, *jmp; + + /* Block has multiple predecessors */ + if ((op_Block == get_irn_op(n)) && + (get_irn_arity(n) > 1)) { + arity = get_irn_arity(n); + + if (n == get_irg_end_block(current_ir_graph)) + return; // No use to add a block here. + + for (i=0; iobst, 1); + /* set predecessor of new block */ + in[0] = pre; + block = new_Block(1, in); + /* insert new jmp node to new block */ + switch_block(block); + jmp = new_Jmp(); + switch_block(n); + /* set successor of new block */ + set_irn_n(n, i, jmp); + + } /* predecessor has multiple successors */ + } /* for all predecessors */ + } /* n is a block */ +} + +void remove_critical_cf_edges(ir_graph *irg) { + if (get_opt_critical_edges()) + irg_walk_graph(irg, NULL, walk_critical_cf_edges, NULL); +}