X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fopt%2Floop.c;h=8e7fae7e6e66c939bf93a6feab9649dcd718b3ac;hb=9fbdcb82b2911748b70d5294195c248d4ee0edaf;hp=ae005ac058052323d6fc61ca04a6ad647a3cf046;hpb=46fcd214e2431ac8ab60ced40a51aa16b798f638;p=libfirm diff --git a/ir/opt/loop.c b/ir/opt/loop.c index ae005ac05..8e7fae7e6 100644 --- a/ir/opt/loop.c +++ b/ir/opt/loop.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved. + * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved. * * This file is part of libFirm. * @@ -22,11 +22,11 @@ * @author Christian Helmer * @brief loop inversion and loop unrolling * - * @version $Id$ */ - #include "config.h" +#include + #include "iroptimize.h" #include "opt_init.h" #include "irnode.h" @@ -44,18 +44,14 @@ #include "beutil.h" #include "irpass.h" #include "irdom.h" +#include "opt_manage.h" +#include #include "irbackedge_t.h" -#include "irphase_t.h" +#include "irnodemap.h" #include "irloop_t.h" - -DEBUG_ONLY(static firm_dbg_module_t *dbg); - -/* DBG print stats for every procedure. */ -#define LOOP_OPT_STATS -/* DBG: Ignore node limits and process every possible loop. */ -#define LOOP_IGNORE_NODE_LIMITS 1 +DEBUG_ONLY(static firm_dbg_module_t *dbg;) /** * Convenience macro for iterating over every phi node of the given block. @@ -78,7 +74,7 @@ typedef enum { } unrolling_kind_flag; /* Condition for performing visiting a node during copy_walk. */ -typedef unsigned walker_condition(ir_node *); +typedef bool walker_condition(const ir_node *); /* Node and position of a predecessor. */ typedef struct entry_edge { @@ -90,25 +86,18 @@ typedef struct entry_edge { /* Node info for unrolling. */ typedef struct unrolling_node_info { ir_node **copies; - /*ir_node **ins;*/ } unrolling_node_info; /* Outs of the nodes head. */ static entry_edge *cur_head_outs; /* Information about the loop head */ -static ir_node *loop_head = NULL; -static unsigned loop_head_valid = 1; +static ir_node *loop_head = NULL; +static bool loop_head_valid = true; /* List of all inner loops, that are processed. */ static ir_loop **loops; -#ifdef LOOP_OPT_STATS - -#define count_stats(val) (++val) -#define print_stats() (do_print_stats()) -#define reset_stats() (do_reset_stats()) - /* Stats */ typedef struct loop_stats_t { unsigned loops; @@ -128,13 +117,13 @@ typedef struct loop_stats_t { static loop_stats_t stats; /* Set stats to sero */ -static void do_reset_stats(void) +static void reset_stats(void) { memset(&stats, 0, sizeof(loop_stats_t)); } /* Print stats */ -static void do_print_stats(void) +static void print_stats(void) { DB((dbg, LEVEL_2, "---------------------------------------\n")); DB((dbg, LEVEL_2, "loops : %d\n",stats.loops)); @@ -148,26 +137,22 @@ static void do_print_stats(void) DB((dbg, LEVEL_2, "invariant_unroll : %d\n",stats.invariant_unroll)); DB((dbg, LEVEL_2, "=======================================\n")); } -#else -/* No stats */ -#define count_stats(val) ((void)0) -#define print_stats() ((void)0) -#define reset_stats() ((void)0) - -#endif /* Commandline parameters */ typedef struct loop_opt_params_t { - unsigned max_loop_size; /* Maximum number of nodes */ - int depth_adaption; /* Loop nest depth adaption */ - unsigned allowed_calls; /* Number of calls allowed */ - unsigned count_phi:1; /* Count phi nodes */ - unsigned count_proj:1; /* Count projections */ +unsigned max_loop_size; /* Maximum number of nodes [nodes]*/ +int depth_adaption; /* Loop nest depth adaption [percent] */ +unsigned allowed_calls; /* Number of calls allowed [number] */ +bool count_phi; /* Count phi nodes */ +bool count_proj; /* Count projections */ - unsigned max_cc_size; /* Maximum condition chain size */ +unsigned max_cc_size; /* Maximum condition chain size [nodes] */ +unsigned max_branches; - unsigned allow_const_unrolling:1; - unsigned allow_invar_unrolling:1; +unsigned max_unrolled_loop_size; /* [nodes] */ +bool allow_const_unrolling; +bool allow_invar_unrolling; +unsigned invar_unrolling_min_size; /* [nodes] */ } loop_opt_params_t; @@ -175,22 +160,23 @@ static loop_opt_params_t opt_params; /* Loop analysis informations */ typedef struct loop_info_t { - unsigned nodes; /* node count */ - unsigned ld_st; /* load and store nodes */ - unsigned calls; /* number of calls */ - unsigned cf_outs; /* number of cf edges which leave the loop */ - entry_edge cf_out; /* single loop leaving cf edge */ - int be_src_pos; /* position of the single own backedge in the head */ + unsigned nodes; /* node count */ + unsigned ld_st; /* load and store nodes */ + unsigned branches; /* number of conditions */ + unsigned calls; /* number of calls */ + unsigned cf_outs; /* number of cf edges which leave the loop */ + entry_edge cf_out; /* single loop leaving cf edge */ + int be_src_pos; /* position of the single own backedge in the head */ /* for inversion */ - unsigned cc_size; /* nodes in the condition chain */ + unsigned cc_size; /* nodes in the condition chain */ /* for unrolling */ - unsigned max_unroll; /* Number of unrolls satisfying max_loop_size */ - unsigned exit_cond; /* 1 if condition==true exits the loop. */ - unsigned latest_value:1; /* 1 if condition is checked against latest counter value */ - unsigned needs_backedge:1; /* 0 if loop is completely unrolled */ - unsigned decreasing:1; /* Step operation is_Sub, or step is<0 */ + unsigned max_unroll; /* Number of unrolls satisfying max_loop_size */ + unsigned exit_cond; /* 1 if condition==true exits the loop. */ + unsigned latest_value:1; /* 1 if condition is checked against latest counter value */ + unsigned needs_backedge:1; /* 0 if loop is completely unrolled */ + unsigned decreasing:1; /* Step operation is_Sub, or step is<0 */ /* IV informations of a simple loop */ ir_node *start_val; @@ -199,10 +185,10 @@ typedef struct loop_info_t { ir_node *iteration_phi; ir_node *add; - tarval *count_tar; /* Number of loop iterations */ + ir_tarval *count_tar; /* Number of loop iterations */ - ir_node *duff_cond; /* Duff mod */ - unrolling_kind_flag unroll_kind; /* constant or invariant unrolling */ + ir_node *duff_cond; /* Duff mod */ + unrolling_kind_flag unroll_kind; /* constant or invariant unrolling */ } loop_info_t; /* Information about the current loop */ @@ -224,7 +210,8 @@ static entry_edge *loop_entries; /* Number of unrolls to perform */ static int unroll_nr; /* Phase is used to keep copies of nodes. */ -static ir_phase *phase; +static ir_nodemap map; +static struct obstack obst; /* Loop operations. */ typedef enum loop_op_t { @@ -236,19 +223,13 @@ typedef enum loop_op_t { /* Saves which loop operation to do until after basic tests. */ static loop_op_t loop_op; -/************************************************************************/ - /* Returns the maximum nodes for the given nest depth */ static unsigned get_max_nodes_adapted(unsigned depth) { - int adapt_permil = opt_params.depth_adaption * depth; - unsigned permil_change; + double perc = 100.0 + (double)opt_params.depth_adaption; + double factor = pow(perc / 100.0, depth); - if (adapt_permil < -1000) - return 0; - - permil_change = 1000 + adapt_permil; - return (opt_params.max_loop_size * permil_change) / 1000; + return (int)((double)opt_params.max_loop_size * factor); } /* Reset nodes link. For use with a walker. */ @@ -259,60 +240,82 @@ static void reset_link(ir_node *node, void *env) } /* Returns 0 if the node or block is not in cur_loop. */ -static unsigned is_in_loop(ir_node *node) +static bool is_in_loop(const ir_node *node) { - return (get_irn_loop(get_block(node)) == cur_loop); + return get_irn_loop(get_block_const(node)) == cur_loop; } /* Returns 0 if the given edge is not a backedge * with its pred in the cur_loop. */ -static unsigned is_own_backedge(ir_node *n, int pos) +static bool is_own_backedge(const ir_node *n, int pos) { - return (is_backedge(n, pos) && is_in_loop(get_irn_n(n, pos))); + return is_backedge(n, pos) && is_in_loop(get_irn_n(n, pos)); } /* Finds loop head and some loop_info as calls or else if necessary. */ static void get_loop_info(ir_node *node, void *env) { - unsigned node_in_loop, pred_in_loop; + bool node_in_loop = is_in_loop(node); int i, arity; (void)env; - arity = get_irn_arity(node); - for (i = 0; i < arity; i++) { - ir_node *pred = get_irn_n(node, i); + /* collect some loop information */ + if (node_in_loop) { + if (is_Phi(node) && opt_params.count_phi) + ++loop_info.nodes; + else if (is_Proj(node) && opt_params.count_proj) + ++loop_info.nodes; + else if (!is_Confirm(node) && !is_Const(node) && !is_SymConst(node)) + ++loop_info.nodes; - pred_in_loop = is_in_loop(pred); - node_in_loop = is_in_loop(node); + if (is_Load(node) || is_Store(node)) + ++loop_info.ld_st; - /* collect some loop information */ - if (node_in_loop) { - if (is_Phi(node) && opt_params.count_phi) - ++loop_info.nodes; - else if (is_Proj(node) && opt_params.count_proj) - ++loop_info.nodes; - else if (!is_Confirm(node) && !is_Const(node) && !is_SymConst(node)) - ++loop_info.nodes; + if (is_Call(node)) + ++loop_info.calls; + } - if (is_Load(node) || is_Store(node)) - ++loop_info.ld_st; + arity = get_irn_arity(node); + for (i = 0; i < arity; i++) { + ir_node *pred = get_irn_n(node, i); + bool pred_in_loop = is_in_loop(pred); - if (is_Call(node)) - ++loop_info.calls; + if (is_Block(node) && !node_in_loop && pred_in_loop) { + entry_edge entry; + entry.node = node; + entry.pos = i; + entry.pred = pred; + /* Count cf outs */ + ++loop_info.cf_outs; + loop_info.cf_out = entry; } /* Find the loops head/the blocks with cfpred outside of the loop */ - if (is_Block(node) && node_in_loop && !pred_in_loop && loop_head_valid) { - ir_node *cfgpred = get_Block_cfgpred(node, i); - - if (!is_in_loop(cfgpred)) { - DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", - node, pred)); - /* another head? We do not touch this. */ - if (loop_head && loop_head != node) { - loop_head_valid = 0; - } else { - loop_head = node; + if (is_Block(node)) { + const ir_edge_t *edge; + unsigned outs_n = 0; + + /* Count innerloop branches */ + foreach_out_edge_kind(node, edge, EDGE_KIND_BLOCK) { + ir_node *succ = get_edge_src_irn(edge); + if (is_Block(succ) && is_in_loop(succ)) + ++outs_n; + } + if (outs_n > 1) + ++loop_info.branches; + + if (node_in_loop && !pred_in_loop && loop_head_valid) { + ir_node *cfgpred = get_Block_cfgpred(node, i); + + if (!is_in_loop(cfgpred)) { + DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", + node, pred)); + /* another head? We do not touch this. */ + if (loop_head && loop_head != node) { + loop_head_valid = false; + } else { + loop_head = node; + } } } } @@ -340,11 +343,6 @@ static void get_loop_entries(ir_node *node, void *env) entry.pos = i; entry.pred = pred; ARR_APP1(entry_edge, loop_entries, entry); - /* Count cf outs */ - if (is_Block(node)) { - ++loop_info.cf_outs; - loop_info.cf_out = entry; - } } } } @@ -360,7 +358,7 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, int fi { int i; int n_cfgpreds; - ir_graph *irg; + ir_graph *irg = get_irn_irg(block); ir_node *phi; ir_node **in; @@ -370,7 +368,7 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, int fi * Dead and bad blocks. */ if (get_irn_arity(block) < 1 || is_Bad(block)) { DB((dbg, LEVEL_5, "ssa bad %N\n", block)); - return new_Bad(); + return new_r_Bad(irg, mode); } if (block == ssa_second_def_block && !first) { @@ -385,7 +383,6 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, int fi return value; } - irg = get_irn_irg(block); assert(block != get_irg_start_block(irg)); /* a Block with only 1 predecessor needs no Phi */ @@ -406,7 +403,7 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, int fi /* create a new Phi */ NEW_ARR_A(ir_node*, in, n_cfgpreds); for (i = 0; i < n_cfgpreds; ++i) - in[i] = new_Unknown(mode); + in[i] = new_r_Dummy(irg, mode); phi = new_r_Phi(block, n_cfgpreds, in, mode); /* Important: always keep block phi list up to date. */ @@ -495,18 +492,17 @@ static void construct_ssa(ir_node *orig_block, ir_node *orig_val, /* Assign the copy with index nr to node n */ static void set_unroll_copy(ir_node *n, int nr, ir_node *cp) { + unrolling_node_info *info; assert(nr != 0 && "0 reserved"); - unrolling_node_info *info = (unrolling_node_info *)phase_get_irn_data(phase, n); + info = (unrolling_node_info*)ir_nodemap_get(&map, n); if (! info) { - ir_node **arr; + ir_node **arr = NEW_ARR_D(ir_node*, &obst, unroll_nr); + memset(arr, 0, unroll_nr * sizeof(ir_node*)); - info = XMALLOCZ(unrolling_node_info); - arr = NEW_ARR_F(ir_node *, unroll_nr); + info = OALLOCZ(&obst, unrolling_node_info); info->copies = arr; - memset(info->copies, 0, (unroll_nr) * sizeof(ir_node *)); - - phase_set_irn_data(phase, n, info); + ir_nodemap_insert(&map, n, info); } /* Original node */ info->copies[0] = n; @@ -517,11 +513,12 @@ static void set_unroll_copy(ir_node *n, int nr, ir_node *cp) /* Returns a nodes copy if it exists, else NULL. */ static ir_node *get_unroll_copy(ir_node *n, int nr) { - unrolling_node_info *info = (unrolling_node_info *)phase_get_irn_data(phase, n); + ir_node *cp; + unrolling_node_info *info = (unrolling_node_info *)ir_nodemap_get(&map, n); if (! info) return NULL; - ir_node *cp = info->copies[nr]; + cp = info->copies[nr]; return cp; } @@ -531,13 +528,13 @@ static ir_node *get_unroll_copy(ir_node *n, int nr) /* Sets copy cp of node n. */ static void set_inversion_copy(ir_node *n, ir_node *cp) { - phase_set_irn_data(phase, n, cp); + ir_nodemap_insert(&map, n, cp); } /* Getter of copy of n for inversion */ static ir_node *get_inversion_copy(ir_node *n) { - ir_node *cp = (ir_node *)phase_get_irn_data(phase, n); + ir_node *cp = (ir_node *)ir_nodemap_get(&map, n); return cp; } @@ -552,32 +549,47 @@ static void reset_block_mark(ir_node *node, void * env) /* Returns mark of node, or its block if node is not a block. * Used in this context to determine if node is in the condition chain. */ -static unsigned is_nodes_block_marked(ir_node* node) +static bool is_nodes_block_marked(const ir_node* node) { - if (is_Block(node)) - return get_Block_mark(node); - else - return get_Block_mark(get_block(node)); + return get_Block_mark(get_block_const(node)); } /* Extends a nodes ins by node new. * NOTE: This is slow if a node n needs to be extended more than once. */ -static int extend_irn(ir_node *n, ir_node *new) +static void extend_irn(ir_node *n, ir_node *newnode, bool new_is_backedge) { - ir_node **ins; int i; int arity = get_irn_arity(n); int new_arity = arity + 1; - - NEW_ARR_A(ir_node *, ins, new_arity); + ir_node **ins = XMALLOCN(ir_node*, new_arity); + bool *bes = XMALLOCN(bool, new_arity); + + /* save bes */ + /* Bes are important! + * Another way would be recreating the looptree, + * but after that we cannot distinguish already processed loops + * from not yet processed ones. */ + if (is_Block(n)) { + for(i = 0; i < arity; ++i) { + bes[i] = is_backedge(n, i); + } + bes[i] = new_is_backedge; + } for(i = 0; i < arity; ++i) { ins[i] = get_irn_n(n, i); } - ins[i] = new; + ins[i] = newnode; set_irn_in(n, new_arity, ins); - return arity; + + /* restore bes */ + if (is_Block(n)) { + for(i = 0; i < new_arity; ++i) { + if (bes[i]) + set_backedge(n, i); + } + } } /* Extends a block by a copy of its pred at pos, @@ -593,7 +605,7 @@ static void extend_ins_by_copy(ir_node *block, int pos) pred = get_irn_n(block, pos); new_in = get_inversion_copy(pred); DB((dbg, LEVEL_5, "Extend block %N by %N cp of %N\n", block, new_in, pred)); - extend_irn(block, new_in); + extend_irn(block, new_in, false); /* Extend block phis by copy of definition at pos */ for_each_phi(block, phi) { @@ -609,18 +621,18 @@ static void extend_ins_by_copy(ir_node *block, int pos) new_in = cp; DB((dbg, LEVEL_5, "Extend phi %N by %N cp of %N\n", phi, new_in, pred)); - extend_irn(phi, new_in); + extend_irn(phi, new_in, false); } } /* Returns the number of blocks backedges. With or without alien bes. */ -static int get_backedge_n(ir_node *block, unsigned with_alien) +static int get_backedge_n(ir_node *block, bool with_alien) { int i; int be_n = 0; int arity = get_irn_arity(block); - assert(is_Block(block) && "We only required backedges of blocks."); + assert(is_Block(block)); for (i = 0; i < arity; ++i) { ir_node *pred = get_irn_n(block, i); @@ -647,8 +659,6 @@ static ir_node *copy_node(ir_node *node) } if (is_Block(cp)) { - /* We may not keep the old macroblock. */ - set_Block_MacroBlock(cp, cp); set_Block_mark(cp, 0); } @@ -663,7 +673,7 @@ static ir_node *copy_node(ir_node *node) * Order of ins is important for later usage. */ static void copy_walk(ir_node *node, walker_condition *walk_condition, - ir_loop *set_loop) + ir_loop *set_loop) { int i; int arity; @@ -745,8 +755,8 @@ static void copy_walk(ir_node *node, walker_condition *walk_condition, * Order of ins is important for later usage. * Takes copy_index, to phase-link copy at specific index. */ -static void copy_walk_n(ir_node *node, - walker_condition *walk_condition, int copy_index) +static void copy_walk_n(ir_node *node, walker_condition *walk_condition, + int copy_index) { int i; int arity; @@ -820,8 +830,8 @@ static void copy_walk_n(ir_node *node, /* Removes alle Blocks with non marked predecessors from the condition chain. */ static void unmark_not_allowed_cc_blocks(void) { - int blocks = ARR_LEN(cc_blocks); - int i; + size_t blocks = ARR_LEN(cc_blocks); + size_t i; for(i = 0; i < blocks; ++i) { ir_node *block = cc_blocks[i]; @@ -845,20 +855,22 @@ static void unmark_not_allowed_cc_blocks(void) } } -/* Unmarks all cc blocks using cc_blocks except head. */ +/* Unmarks all cc blocks using cc_blocks except head. + * TODO: invert head for unrolling? */ static void unmark_cc_blocks(void) { - int blocks = ARR_LEN(cc_blocks); - int i; + size_t blocks = ARR_LEN(cc_blocks); + size_t i; for(i = 0; i < blocks; ++i) { ir_node *block = cc_blocks[i]; - /* Head is an exception. */ - if (block != loop_head) - set_Block_mark(block, 0); + /* TODO Head is an exception. */ + /*if (block != loop_head)*/ + set_Block_mark(block, 0); } - inversion_blocks_in_cc = 1; + /*inversion_blocks_in_cc = 1;*/ + inversion_blocks_in_cc = 0; /* invalidate */ loop_info.cc_size = 0; @@ -910,18 +922,18 @@ static void get_head_outs(ir_node *node, void *env) } /** - * Find condition chains, and add them to be inverted + * Find condition chains, and add them to be inverted. * A block belongs to the chain if a condition branches out of the loop. * (Some blocks need to be removed once again.) * Returns 1 if the given block belongs to the condition chain. */ -static unsigned find_condition_chain(ir_node *block) +static void find_condition_chain(ir_node *block) { const ir_edge_t *edge; - unsigned mark = 0; - unsigned has_be = 0; - unsigned jmp_only; - unsigned nodes_n; + bool mark = false; + bool has_be = false; + bool jmp_only = true; + unsigned nodes_n = 0; mark_irn_visited(block); @@ -938,16 +950,15 @@ static unsigned find_condition_chain(ir_node *block) * continuing with another subtree. */ if (loop_info.cc_size + nodes_n > opt_params.max_cc_size) { set_Block_mark(block, 0); - return 0; + return; } /* Check if block only has a jmp instruction. */ - jmp_only = 1; foreach_out_edge(block, edge) { ir_node *src = get_edge_src_irn(edge); - if (! is_Block(src) && ! is_Jmp(src)) { - jmp_only = 0; + if (!is_Block(src) && !is_Jmp(src)) { + jmp_only = false; } } @@ -957,8 +968,8 @@ static unsigned find_condition_chain(ir_node *block) ir_node *src = get_edge_src_irn(edge); int pos = get_edge_src_pos(edge); - if (! is_in_loop(src)) - mark = 1; + if (!is_in_loop(src)) + mark = true; /* Inverting blocks with backedge outs leads to a cf edge * from the inverted head, into the inverted head (skipping the body). @@ -966,7 +977,7 @@ static unsigned find_condition_chain(ir_node *block) * this would introduce another loop in the existing loop. * This loop inversion cannot cope with this case. */ if (is_backedge(src, pos)) { - has_be = 1; + has_be = true; break; } } @@ -979,13 +990,13 @@ static unsigned find_condition_chain(ir_node *block) * / A* B / | * / /\ / ? | * / C* => D | - * / D Head | + * / D Head | * / A \_| * C */ /* Collect blocks containing only a Jmp. * Do not collect blocks with backedge outs. */ - if ((jmp_only == 1 || mark == 1) && has_be == 0) { + if ((jmp_only || mark) && !has_be) { set_Block_mark(block, 1); ++inversion_blocks_in_cc; loop_info.cc_size += nodes_n; @@ -1001,12 +1012,10 @@ static unsigned find_condition_chain(ir_node *block) if (is_in_loop(src) && ! irn_visited(src)) find_condition_chain(src); } - - return mark; } /** - * Rewires the copied condition chain. Removes backedges. + * Rewires the copied condition chain. Removes backedges * as this condition chain is prior to the loop. * Copy of loop_head must have phi list and old (unfixed) backedge info of the loop head. * (loop_head is already fixed, we cannot rely on it.) @@ -1017,10 +1026,11 @@ static void fix_copy_inversion(void) ir_node **ins; ir_node **phis; ir_node *phi, *next; - ir_node *head_cp = get_inversion_copy(loop_head); - int arity = get_irn_arity(head_cp); - int backedges = get_backedge_n(head_cp, 0); - int new_arity = arity - backedges; + ir_node *head_cp = get_inversion_copy(loop_head); + ir_graph *irg = get_irn_irg(head_cp); + int arity = get_irn_arity(head_cp); + int backedges = get_backedge_n(head_cp, false); + int new_arity = arity - backedges; int pos; int i; @@ -1033,7 +1043,7 @@ static void fix_copy_inversion(void) ins[pos++] = get_irn_n(head_cp, i); } - new_head = new_Block(new_arity, ins); + new_head = new_r_Block(irg, new_arity, ins); phis = NEW_ARR_F(ir_node *, 0); @@ -1061,6 +1071,7 @@ static void fix_copy_inversion(void) DEL_ARR_F(phis); } + /* Puts the original condition chain at the end of the loop, * subsequently to the body. * Relies on block phi list and correct backedges. @@ -1071,9 +1082,10 @@ static void fix_head_inversion(void) ir_node **ins; ir_node *phi, *next; ir_node **phis; - int arity = get_irn_arity(loop_head); - int backedges = get_backedge_n(loop_head, 0); - int new_arity = backedges; + ir_graph *irg = get_irn_irg(loop_head); + int arity = get_irn_arity(loop_head); + int backedges = get_backedge_n(loop_head, false); + int new_arity = backedges; int pos; int i; @@ -1086,7 +1098,7 @@ static void fix_head_inversion(void) ins[pos++] = get_irn_n(loop_head, i); } - new_head = new_Block(new_arity, ins); + new_head = new_r_Block(irg, new_arity, ins); phis = NEW_ARR_F(ir_node *, 0); @@ -1104,12 +1116,11 @@ static void fix_head_inversion(void) /* If assignment is in the condition chain, * we need to create a phi in the new loop head. * This can only happen for df, not cf. See find_condition_chains. */ - if (is_nodes_block_marked(pred)) { - /* Cannot do this here. */ - ins[pos++] = pred; /*fix_inner_cc_definitions(phi, pred);*/ - } else { + /*if (is_nodes_block_marked(pred)) { ins[pos++] = pred; - } + } else {*/ + ins[pos++] = pred; + } } @@ -1119,7 +1130,7 @@ static void fix_head_inversion(void) ARR_APP1(ir_node *, phis, new_phi); - DB((dbg, LEVEL_5, "fix inverted head should exch %N by %N (arity %d)\n", phi, new_phi, pos )); + DB((dbg, LEVEL_5, "fix inverted head should exch %N by %N (pos %d)\n", phi, new_phi, pos )); } pos = 0; @@ -1136,19 +1147,19 @@ static void fix_head_inversion(void) } /* Does the loop inversion. */ -static void inversion_walk(entry_edge *head_entries) +static void inversion_walk(ir_graph *irg, entry_edge *head_entries) { - int i; + size_t i; /* * The order of rewiring bottom-up is crucial. * Any change of the order leads to lost information that would be needed later. */ - ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); + ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED); /* 1. clone condition chain */ - inc_irg_visited(current_ir_graph); + inc_irg_visited(irg); for (i = 0; i < ARR_LEN(head_entries); ++i) { entry_edge entry = head_entries[i]; @@ -1159,7 +1170,7 @@ static void inversion_walk(entry_edge *head_entries) copy_walk(pred, is_nodes_block_marked, cur_loop); } - ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); + ir_free_resources(irg, IR_RESOURCE_IRN_VISITED); /* 2. Extends the head control flow successors ins * with the definitions of the copied head node. */ @@ -1232,20 +1243,61 @@ static void inversion_walk(entry_edge *head_entries) /* 5. Remove the backedges of the copied condition chain, * because it is going to be the new 'head' in advance to the loop. */ fix_copy_inversion(); + } /* Performs loop inversion of cur_loop if possible and reasonable. */ -static void loop_inversion(void) +static void loop_inversion(ir_graph *irg) { - unsigned do_inversion = 1; - unsigned has_cc = 0; + int loop_depth; + unsigned max_loop_nodes = opt_params.max_loop_size; + unsigned max_loop_nodes_adapted; + int depth_adaption = opt_params.depth_adaption; + + bool do_inversion = true; + + /* Depth of 0 is the procedure and 1 a topmost loop. */ + loop_depth = get_loop_depth(cur_loop) - 1; + + /* Calculating in per mil. */ + max_loop_nodes_adapted = get_max_nodes_adapted(loop_depth); + + DB((dbg, LEVEL_1, "max_nodes: %d\nmax_nodes_adapted %d at depth of %d (adaption %d)\n", + max_loop_nodes, max_loop_nodes_adapted, loop_depth, depth_adaption)); + + if (loop_info.nodes == 0) + return; + + if (loop_info.nodes > max_loop_nodes) { + /* Only for stats */ + DB((dbg, LEVEL_1, "Nodes %d > allowed nodes %d\n", + loop_info.nodes, loop_depth, max_loop_nodes)); + ++stats.too_large; + /* no RETURN */ + /* Adaption might change it */ + } + + /* Limit processing to loops smaller than given parameter. */ + if (loop_info.nodes > max_loop_nodes_adapted) { + DB((dbg, LEVEL_1, "Nodes %d > allowed nodes (depth %d adapted) %d\n", + loop_info.nodes, loop_depth, max_loop_nodes_adapted)); + ++stats.too_large_adapted; + return; + } + + if (loop_info.calls > opt_params.allowed_calls) { + DB((dbg, LEVEL_1, "Calls %d > allowed calls %d\n", + loop_info.calls, opt_params.allowed_calls)); + ++stats.calls_limit; + return; + } /*inversion_head_node_limit = INT_MAX;*/ - ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK); + ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK); /* Reset block marks. * We use block marks to flag blocks of the original condition chain. */ - irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL); + irg_walk_graph(irg, reset_block_mark, NULL, NULL); /*loop_info.blocks = get_loop_n_blocks(cur_loop);*/ cond_chain_entries = NEW_ARR_F(entry_edge, 0); @@ -1255,40 +1307,35 @@ static void loop_inversion(void) inversion_blocks_in_cc = 0; /* Use phase to keep copy of nodes from the condition chain. */ - phase = new_phase(current_ir_graph, phase_irn_init_default); + ir_nodemap_init(&map, irg); + obstack_init(&obst); /* Search for condition chains and temporarily save the blocks in an array. */ cc_blocks = NEW_ARR_F(ir_node *, 0); - inc_irg_visited(current_ir_graph); - has_cc = find_condition_chain(loop_head); + inc_irg_visited(irg); + find_condition_chain(loop_head); unmark_not_allowed_cc_blocks(); DEL_ARR_F(cc_blocks); -#if IGNORE_NODE_LIMITS - (void) unmark_cc_blocks; -#else /* Condition chain too large. * Loop should better be small enough to fit into the cache. */ - /* FIXME Of course, we should take a small enough cc in the first place, + /* TODO Of course, we should take a small enough cc in the first place, * which is not that simple. (bin packing) */ if (loop_info.cc_size > opt_params.max_cc_size) { - count_stats(stats.cc_limit_reached); + ++stats.cc_limit_reached; - /* Only head taken? */ - if (inversion_blocks_in_cc == 1) - do_inversion = 0; - else - /* Unmark cc blocks except the head. - * Invert head only for possible unrolling. */ - unmark_cc_blocks(); + do_inversion = false; + + /* Unmark cc blocks except the head. + * Invert head only for possible unrolling. */ + unmark_cc_blocks(); } -#endif /* We also catch endless loops here, * because they do not have a condition chain. */ if (inversion_blocks_in_cc < 1) { - do_inversion = 0; + do_inversion = false; DB((dbg, LEVEL_3, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", inversion_blocks_in_cc)); @@ -1298,29 +1345,27 @@ static void loop_inversion(void) cur_head_outs = NEW_ARR_F(entry_edge, 0); /* Get all edges pointing into the condition chain. */ - irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL); + irg_walk_graph(irg, get_head_outs, NULL, NULL); /* Do the inversion */ - inversion_walk(cur_head_outs); + inversion_walk(irg, cur_head_outs); DEL_ARR_F(cur_head_outs); /* Duplicated blocks changed doms */ - set_irg_doms_inconsistent(current_ir_graph); - /* Loop content changed */ - set_irg_loopinfo_inconsistent(current_ir_graph); - /* TODO are they? Depends on set_irn_in and set_irn_n exchange and new_node. */ - set_irg_outs_inconsistent(current_ir_graph); + clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_DOMINANCE + | IR_GRAPH_STATE_CONSISTENT_LOOPINFO); - count_stats(stats.inverted); + ++stats.inverted; } /* free */ - phase_free(phase); + obstack_free(&obst, NULL); + ir_nodemap_destroy(&map); DEL_ARR_F(cond_chain_entries); DEL_ARR_F(head_df_loop); - ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK); + ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK); } /* Fix the original loop_heads ins for invariant unrolling case. */ @@ -1337,17 +1382,19 @@ static void unrolling_fix_loop_head_inv(void) ins[0] = loop_condition; ins[1] = proj; - set_irn_in(loop_head, 2, ins); + DB((dbg, LEVEL_4, "Rewire ins of block loophead %N to pred %N and duffs entry %N \n" , loop_head, ins[0], ins[1])); for_each_phi(loop_head, phi) { ir_node *pred = get_irn_n(phi, loop_info.be_src_pos); + /* TODO we think it is a phi, but for Mergesort it is not the case.*/ + ir_node *last_pred = get_unroll_copy(pred, unroll_nr - 1); ins[0] = last_pred; - ins[1] = get_irn_link(phi); - + ins[1] = (ir_node*)get_irn_link(phi); set_irn_in(phi, 2, ins); + DB((dbg, LEVEL_4, "Rewire ins of loophead phi %N to pred %N and duffs entry %N \n" , phi, ins[0], ins[1])); } } @@ -1355,6 +1402,7 @@ static void unrolling_fix_loop_head_inv(void) static void correct_phis(ir_node *node, void *env) { (void)env; + if (is_Phi(node) && get_irn_arity(node) == 1) { ir_node *exch; ir_node *in[1]; @@ -1363,7 +1411,7 @@ static void correct_phis(ir_node *node, void *env) exch = new_rd_Phi(get_irn_dbg_info(node), get_nodes_block(node), 1, in, - get_irn_mode(node)); + get_irn_mode(node)); exchange(node, exch); } @@ -1373,7 +1421,8 @@ static void correct_phis(ir_node *node, void *env) static void place_copies(int copies) { ir_node *loophead = loop_head; - int c, i; + size_t i; + int c; int be_src_pos = loop_info.be_src_pos; /* Serialize loops by fixing their head ins. @@ -1415,14 +1464,16 @@ static void place_copies(int copies) } else { /* Invariant case */ /* Every node has 2 ins. One from the duff blocks - * and one from the previous unrolled loop. */ + * and one from the previously unrolled loop. */ ir_node *ins[2]; /* Calculate corresponding projection of mod result for this copy c */ ir_node *proj = new_Proj(loop_info.duff_cond, mode_X, unroll_nr - c - 1); + DB((dbg, LEVEL_4, "New duff proj %N\n" , proj)); ins[0] = new_jmp; ins[1] = proj; - set_irn_in(lower, 1, ins); + set_irn_in(lower, 2, ins); + DB((dbg, LEVEL_4, "Rewire ins of Block %N to pred %N and duffs entry %N \n" , lower, ins[0], ins[1])); for_each_phi(loophead, phi) { ir_node *topmost_phi_pred = get_irn_n(phi, be_src_pos); @@ -1431,8 +1482,10 @@ static void place_copies(int copies) ir_node *duff_phi; lower_phi = get_unroll_copy(phi, c + 1); - duff_phi = get_irn_link(lower_phi); + duff_phi = (ir_node*)get_irn_link(phi); + DB((dbg, LEVEL_4, "DD Link of %N is %N\n" , phi, duff_phi)); + /* */ if (is_in_loop(topmost_phi_pred)) { upper_phi_pred = get_unroll_copy(topmost_phi_pred, c); } else { @@ -1441,13 +1494,13 @@ static void place_copies(int copies) ins[0] = upper_phi_pred; ins[1] = duff_phi; - set_irn_in(lower_phi, 2, ins); + DB((dbg, LEVEL_4, "Rewire ins of %N to pred %N and duffs entry %N \n" , lower_phi, ins[0], ins[1])); } } } - /* Reconnect loop landing pad with last copy. */ + /* Reconnect last copy. */ for (i = 0; i < ARR_LEN(loop_entries); ++i) { entry_edge edge = loop_entries[i]; /* Last copy is at the bottom */ @@ -1469,13 +1522,14 @@ static void place_copies(int copies) ir_node *last_pred; /* It is possible, that the value used - * in the OWN backedge path is NOT defined in this loop. */ + * in the OWN backedge path is NOT assigned in this loop. */ if (is_in_loop(pred)) last_pred = get_unroll_copy(pred, copies); else last_pred = pred; set_irn_n(phi, be_src_pos, last_pred); } + } else { unrolling_fix_loop_head_inv(); } @@ -1484,11 +1538,12 @@ static void place_copies(int copies) /* Copies the cur_loop several times. */ static void copy_loop(entry_edge *cur_loop_outs, int copies) { - int i, c; + int c; ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); for (c = 0; c < copies; ++c) { + size_t i; inc_irg_visited(current_ir_graph); @@ -1507,45 +1562,44 @@ static void copy_loop(entry_edge *cur_loop_outs, int copies) /* Creates a new phi from the given phi node omitting own bes, * using be_block as supplier of backedge informations. */ -static ir_node *clone_phis_sans_bes(ir_node *node, ir_node *be_block) +static ir_node *clone_phis_sans_bes(ir_node *phi, ir_node *be_block, ir_node *dest_block) { ir_node **ins; - int arity = get_irn_arity(node); + int arity = get_irn_arity(phi); int i, c = 0; + ir_node *newphi; - assert(get_irn_arity(node) == get_irn_arity(be_block)); - assert(is_Phi(node)); + assert(get_irn_arity(phi) == get_irn_arity(be_block)); + assert(is_Phi(phi)); ins = NEW_ARR_F(ir_node *, arity); for (i = 0; i < arity; ++i) { if (! is_own_backedge(be_block, i)) { - ins[c] = get_irn_n(node, i); + ins[c] = get_irn_n(phi, i); ++c; } - /* } else { - ir_node *pred = get_inr_n(node, i); - if (! is_in_loop(pred)) { - ins[c] = pred; - ++c; - } - }*/ } - return new_r_Phi(get_nodes_block(node), c, ins, get_irn_mode(node)); + newphi = new_r_Phi(dest_block, c, ins, get_irn_mode(phi)); + + set_irn_link(phi, newphi); + DB((dbg, LEVEL_4, "Linking for duffs device %N to %N\n", phi, newphi)); + + return newphi; } /* Creates a new block from the given block node omitting own bes, * using be_block as supplier of backedge informations. */ static ir_node *clone_block_sans_bes(ir_node *node, ir_node *be_block) { - ir_node **ins; int arity = get_irn_arity(node); int i, c = 0; + ir_node **ins; assert(get_irn_arity(node) == get_irn_arity(be_block)); assert(is_Block(node)); - ins = NEW_ARR_F(ir_node *, arity); + NEW_ARR_A(ir_node *, ins, arity); for (i = 0; i < arity; ++i) { if (! is_own_backedge(be_block, i)) { ins[c] = get_irn_n(node, i); @@ -1556,6 +1610,21 @@ static ir_node *clone_block_sans_bes(ir_node *node, ir_node *be_block) return new_Block(c, ins); } +/* Creates a structure to calculate absolute value of node op. + * Returns mux node with absolute value. */ +static ir_node *new_Abs(ir_node *op, ir_mode *mode) +{ + ir_graph *irg = get_irn_irg(op); + ir_node *block = get_nodes_block(op); + ir_node *zero = new_r_Const(irg, get_mode_null(mode)); + ir_node *cmp = new_r_Cmp(block, op, zero, ir_relation_less); + ir_node *minus_op = new_r_Minus(block, op, mode); + ir_node *mux = new_r_Mux(block, cmp, op, minus_op, mode); + + return mux; +} + + /* Creates blocks for duffs device, using previously obtained * informations about the iv. * TODO split */ @@ -1564,8 +1633,8 @@ static void create_duffs_block(void) ir_mode *mode; ir_node *block1, *count_block, *duff_block; - ir_node *ems, *ems_divmod, *ems_mod_proj, *cmp_null, - *cmp_proj, *ems_mode_cond, *x_true, *x_false, *const_null; + ir_node *ems, *ems_mod, *ems_div, *ems_mod_proj, *cmp_null, + *ems_mode_cond, *x_true, *x_false, *const_null; ir_node *true_val, *false_val; ir_node *ins[2]; @@ -1573,6 +1642,7 @@ static void create_duffs_block(void) ir_node *count, *correction, *unroll_c; ir_node *cmp_bad_count, *good_count, *bad_count, *count_phi, *bad_count_neg; + ir_node *phi; mode = get_irn_mode(loop_info.end_val); const_null = new_Const(get_mode_null(mode)); @@ -1581,37 +1651,50 @@ static void create_duffs_block(void) * 1. Calculate first approach to count. * Condition: (end - start) % step == 0 */ block1 = clone_block_sans_bes(loop_head, loop_head); + DB((dbg, LEVEL_4, "Duff block 1 %N\n", block1)); /* Create loop entry phis in first duff block * as it becomes the loops preheader */ - if (loop_info.unroll_kind == invariant) { - ir_node *phi; - for_each_phi(loop_head, phi) { - ir_node *new_phi = clone_phis_sans_bes(phi, loop_head); - set_nodes_block(new_phi, block1); - } + for_each_phi(loop_head, phi) { + /* Returns phis pred if phi would have arity 1*/ + ir_node *new_phi = clone_phis_sans_bes(phi, loop_head, block1); + + DB((dbg, LEVEL_4, "HEAD %N phi %N\n", loop_head, phi)); + DB((dbg, LEVEL_4, "BLOCK1 %N phi %N\n", block1, new_phi)); } ems = new_r_Sub(block1, loop_info.end_val, loop_info.start_val, get_irn_mode(loop_info.end_val)); + DB((dbg, LEVEL_4, "BLOCK1 sub %N\n", ems)); + + + ems = new_Sub(loop_info.end_val, loop_info.start_val, + get_irn_mode(loop_info.end_val)); - ems_divmod = new_r_DivMod(block1, + DB((dbg, LEVEL_4, "mod ins %N %N\n", ems, loop_info.step)); + ems_mod = new_r_Mod(block1, new_NoMem(), ems, loop_info.step, mode, op_pin_state_pinned); + ems_div = new_r_Div(block1, + new_NoMem(), + ems, + loop_info.step, + mode, + op_pin_state_pinned); + + DB((dbg, LEVEL_4, "New module node %N\n", ems_mod)); - ems_mod_proj = new_r_Proj(ems_divmod, mode, pn_DivMod_res_mod); - cmp_null = new_r_Cmp(block1, ems_mod_proj, const_null); - cmp_proj = new_r_Proj(cmp_null, mode, pn_Cmp_Eq); - ems_mode_cond = new_Cond(cmp_proj); + ems_mod_proj = new_r_Proj(ems_mod, mode_Iu, pn_Mod_res); + cmp_null = new_r_Cmp(block1, ems_mod_proj, const_null, ir_relation_less); + ems_mode_cond = new_r_Cond(block1, cmp_null); /* ems % step == 0 */ - x_true = new_Proj(ems_mode_cond, mode_X, pn_Cond_true); + x_true = new_r_Proj(ems_mode_cond, mode_X, pn_Cond_true); /* ems % step != 0 */ - x_false = new_Proj(ems_mode_cond, mode_X, pn_Cond_false); - + x_false = new_r_Proj(ems_mode_cond, mode_X, pn_Cond_false); /* 2. Second block. * Assures, duffs device receives a valid count. @@ -1623,6 +1706,8 @@ static void create_duffs_block(void) ins[1] = x_false; count_block = new_Block(2, ins); + DB((dbg, LEVEL_4, "Duff block 2 %N\n", count_block)); + /* Increase loop-taken-count depending on the loop condition * uses the latest iv to compare to. */ @@ -1632,7 +1717,7 @@ static void create_duffs_block(void) /* ems % step != 0 : +1 */ false_val = new_Const(get_mode_one(mode)); } else { - tarval *tv_two = new_tarval_from_long(2, mode); + ir_tarval *tv_two = new_tarval_from_long(2, mode); /* ems % step == 0 : +1 */ true_val = new_Const(get_mode_one(mode)); /* ems % step != 0 : +2 */ @@ -1644,13 +1729,11 @@ static void create_duffs_block(void) correction = new_r_Phi(count_block, 2, ins, mode); - count = new_r_Proj(ems_divmod, mode, pn_DivMod_res_div); + count = new_r_Proj(ems_div, mode, pn_Div_res); /* (end - start) / step + correction */ count = new_Add(count, correction, mode); - cmp_bad_count = new_r_Cmp(count_block, count, const_null); - /* We preconditioned the loop to be tail-controlled. * So, if count is something 'wrong' like 0, * negative/positive (depending on step direction), @@ -1659,12 +1742,14 @@ static void create_duffs_block(void) /* Depending on step direction, we have to check for > or < 0 */ if (loop_info.decreasing == 1) { - bad_count_neg = new_r_Proj(cmp_bad_count, mode_X, pn_Cmp_Lt); + cmp_bad_count = new_r_Cmp(count_block, count, const_null, + ir_relation_less); } else { - bad_count_neg = new_r_Proj(cmp_bad_count, mode_X, pn_Cmp_Gt); + cmp_bad_count = new_r_Cmp(count_block, count, const_null, + ir_relation_greater); } - bad_count_neg = new_Cond(bad_count_neg); + bad_count_neg = new_r_Cond(count_block, cmp_bad_count); good_count = new_Proj(bad_count_neg, mode_X, pn_Cond_true); bad_count = new_Proj(ems_mode_cond, mode_X, pn_Cond_false); @@ -1674,9 +1759,10 @@ static void create_duffs_block(void) ins[0] = good_count; ins[1] = bad_count; duff_block = new_Block(2, ins); + DB((dbg, LEVEL_4, "Duff block 3 %N\n", duff_block)); - /* count wants to be positive */ - ins[0] = get_Abs_op(count); + /* Get absolute value */ + ins[0] = new_Abs(count, mode); /* Manually feed the aforementioned count = 1 (bad case)*/ ins[1] = new_Const(get_mode_one(mode)); count_phi = new_r_Phi(duff_block, 2, ins, mode); @@ -1691,8 +1777,10 @@ static void create_duffs_block(void) mode, op_pin_state_pinned); - proj = new_Proj(duff_mod, mode_X, pn_Mod_res); - cond = new_Cond(proj); + + proj = new_Proj(duff_mod, mode, pn_Mod_res); + /* condition does NOT create itself in the block of the proj! */ + cond = new_r_Cond(duff_block, proj); loop_info.duff_cond = cond; } @@ -1704,8 +1792,11 @@ static unsigned is_loop_invariant_def(ir_node *node) { int i; - if (! is_in_loop(node)) + if (! is_in_loop(node)) { + DB((dbg, LEVEL_4, "Not in loop %N\n", node)); + /* || is_Const(node) || is_SymConst(node)) {*/ return 1; + } /* If this is a phi of the loophead shared by more than 1 loop, * we need to check if all defs are not in the loop. */ @@ -1714,16 +1805,21 @@ static unsigned is_loop_invariant_def(ir_node *node) block = get_nodes_block(node); /* To prevent unexpected situations. */ - if (block != loop_head) + if (block != loop_head) { return 0; + } for (i = 0; i < get_irn_arity(node); ++i) { /* Check if all bes are just loopbacks. */ if (is_own_backedge(block, i) && get_irn_n(node, i) != node) return 0; } + DB((dbg, LEVEL_4, "invar %N\n", node)); + return 1; } - return 1; + DB((dbg, LEVEL_4, "Not invar %N\n", node)); + + return 0; } /* Returns 1 if one pred of node is invariant and the other is not. @@ -1737,20 +1833,29 @@ static unsigned get_invariant_pred(ir_node *node, ir_node **invar_pred, ir_node *other = NULL; if (is_loop_invariant_def(pred0)) { + DB((dbg, LEVEL_4, "pred0 invar %N\n", pred0)); *invar_pred = pred0; *other = pred1; } if (is_loop_invariant_def(pred1)) { - if (invar_pred != NULL) + DB((dbg, LEVEL_4, "pred1 invar %N\n", pred1)); + + if (*invar_pred != NULL) { /* RETURN. We do not want both preds to be invariant. */ return 0; + } *other = pred0; *invar_pred = pred1; return 1; } else { - return 0; + DB((dbg, LEVEL_4, "pred1 not invar %N\n", pred1)); + + if (*invar_pred != NULL) + return 1; + else + return 0; } } @@ -1822,7 +1927,6 @@ static unsigned get_const_pred(ir_node *node, ir_node **const_pred, ir_node **ot /*DB((dbg, LEVEL_4, "is %N const\n", pred0));*/ if (is_Const(pred0) || is_SymConst(pred0)) { - DB((dbg, LEVEL_1, "%N is constant\n", pred0)); *const_pred = pred0; *other = pred1; } @@ -1830,12 +1934,10 @@ static unsigned get_const_pred(ir_node *node, ir_node **const_pred, ir_node **ot /*DB((dbg, LEVEL_4, "is %N const\n", pred1));*/ if (is_Const(pred1) || is_SymConst(pred1)) { if (*const_pred != NULL) { - DB((dbg, LEVEL_1, "%N is ALSO constant\n", pred1)); /* RETURN. We do not want both preds to be constant. */ return 0; } - DB((dbg, LEVEL_4, "%N is constant\n", pred1)); *other = pred0; *const_pred = pred1; } @@ -1846,47 +1948,28 @@ static unsigned get_const_pred(ir_node *node, ir_node **const_pred, ir_node **ot return 1; } -/* Returns the mathematically inverted pn_Cmp. */ -static pn_Cmp get_math_inverted_case(pn_Cmp proj) -{ - switch(proj) { - case pn_Cmp_Eq: - return pn_Cmp_Lg; - case pn_Cmp_Lg: - return pn_Cmp_Eq; - case pn_Cmp_Lt: - return pn_Cmp_Ge; - case pn_Cmp_Le: - return pn_Cmp_Gt; - case pn_Cmp_Gt: - return pn_Cmp_Le; - case pn_Cmp_Ge: - return pn_Cmp_Lt; - default: - panic("Unhandled pn_Cmp."); - } -} - -/* norm_proj means we do not exit the loop. */ -static unsigned simulate_next(tarval **count_tar, - tarval *stepped, tarval *step_tar, tarval *end_tar, pn_Cmp norm_proj) +/* Returns 1 if loop exits within 2 steps of the iv. + * Norm_proj means we do not exit the loop.*/ +static unsigned simulate_next(ir_tarval **count_tar, + ir_tarval *stepped, ir_tarval *step_tar, ir_tarval *end_tar, + ir_relation norm_proj) { - tarval *next; + ir_tarval *next; - DB((dbg, LEVEL_1, "Loop taken if (stepped)%ld %s (end)%ld ", + DB((dbg, LEVEL_4, "Loop taken if (stepped)%ld %s (end)%ld ", get_tarval_long(stepped), - get_pnc_string((norm_proj)), + get_relation_string((norm_proj)), get_tarval_long(end_tar))); - DB((dbg, LEVEL_1, "comparing latest value %d\n", loop_info.latest_value)); + DB((dbg, LEVEL_4, "comparing latest value %d\n", loop_info.latest_value)); /* If current iv does not stay in the loop, * this run satisfied the exit condition. */ if (! (tarval_cmp(stepped, end_tar) & norm_proj)) return 1; - DB((dbg, LEVEL_1, "Result: (stepped)%ld IS %s (end)%ld\n", + DB((dbg, LEVEL_4, "Result: (stepped)%ld IS %s (end)%ld\n", get_tarval_long(stepped), - get_pnc_string(tarval_cmp(stepped, end_tar)), + get_relation_string(tarval_cmp(stepped, end_tar)), get_tarval_long(end_tar))); /* next step */ @@ -1896,11 +1979,11 @@ static unsigned simulate_next(tarval **count_tar, /* sub */ next = tarval_sub(stepped, step_tar, get_irn_mode(loop_info.end_val)); - DB((dbg, LEVEL_1, "Loop taken if %ld %s %ld ", + DB((dbg, LEVEL_4, "Loop taken if %ld %s %ld ", get_tarval_long(next), - get_pnc_string(norm_proj), + get_relation_string(norm_proj), get_tarval_long(end_tar))); - DB((dbg, LEVEL_1, "comparing latest value %d\n", loop_info.latest_value)); + DB((dbg, LEVEL_4, "comparing latest value %d\n", loop_info.latest_value)); /* Increase steps. */ *count_tar = tarval_add(*count_tar, get_tarval_one(get_tarval_mode(*count_tar))); @@ -1923,8 +2006,7 @@ static unsigned simulate_next(tarval **count_tar, static ir_node *is_simple_loop(void) { int arity, i; - unsigned loop_depth, max_loop_nodes_adapted; - ir_node *loop_block, *exit_block, *projx, *cond, *projres, *loop_condition; + ir_node *loop_block, *exit_block, *projx, *cond, *cmp; /* Maximum of one condition, and no endless loops. */ if (loop_info.cf_outs != 1) @@ -1932,25 +2014,15 @@ static ir_node *is_simple_loop(void) DB((dbg, LEVEL_4, "1 loop exit\n")); -#if 0 - /* Ignore loop size. Probably not wise in other than testcases. */ - (void) max_loop_nodes_adapted; - (void) loop_depth; - - loop_info.max_unroll = 6; -#else /* Calculate maximum unroll_nr keeping node count below limit. */ - loop_depth = get_loop_depth(cur_loop) - 1; - max_loop_nodes_adapted = get_max_nodes_adapted(loop_depth); - - loop_info.max_unroll = opt_params.max_loop_size / loop_info.nodes; + loop_info.max_unroll = (int)((double)opt_params.max_unrolled_loop_size / (double)loop_info.nodes); if (loop_info.max_unroll < 2) { - count_stats(stats.too_large); + ++stats.too_large; return NULL; } -#endif + DB((dbg, LEVEL_4, "maximum unroll factor %u, to not exceed node limit \n", - loop_info.max_unroll)); + opt_params.max_unrolled_loop_size)); arity = get_irn_arity(loop_head); /* RETURN if we have more than 1 be. */ @@ -1983,13 +2055,12 @@ static ir_node *is_simple_loop(void) /* find value on which loop exit depends */ projx = loop_info.cf_out.pred; cond = get_irn_n(projx, 0); - projres = get_irn_n(cond, 0); - loop_condition = get_irn_n(projres, 0); + cmp = get_irn_n(cond, 0); - if (!is_Cmp(loop_condition)) + if (!is_Cmp(cmp)) return NULL; - DB((dbg, LEVEL_5, "projection is %s\n", get_pnc_string(get_Proj_proj(projx)))); + DB((dbg, LEVEL_5, "projection is %s\n", get_relation_string(get_Proj_proj(projx)))); switch(get_Proj_proj(projx)) { case pn_Cond_false: @@ -2003,8 +2074,7 @@ static ir_node *is_simple_loop(void) } DB((dbg, LEVEL_4, "Valid Cmp.\n")); - - return projres; + return cmp; } /* Returns 1 if all nodes are mode_Iu or mode_Is. */ @@ -2026,19 +2096,27 @@ static unsigned are_mode_I(ir_node *n1, ir_node* n2, ir_node *n3) static unsigned get_unroll_decision_invariant(void) { - ir_node *projres, *loop_condition, *iteration_path; - unsigned success, is_latest_val; - tarval *start_tar, *step_tar; - ir_mode *mode; + ir_node *projres, *loop_condition, *iteration_path; + unsigned success; + ir_tarval *step_tar; + ir_mode *mode; + /* RETURN if loop is not 'simple' */ projres = is_simple_loop(); if (projres == NULL) return 0; + /* Use a minimal size for the invariant unrolled loop, + * as duffs device produces overhead */ + if (loop_info.nodes < opt_params.invar_unrolling_min_size) + return 0; + loop_condition = get_irn_n(projres, 0); success = get_invariant_pred(loop_condition, &loop_info.end_val, &iteration_path); + DB((dbg, LEVEL_4, "pred invar %d\n", success)); + if (! success) return 0; @@ -2048,11 +2126,8 @@ static unsigned get_unroll_decision_invariant(void) * Until now we only have end_val. */ if (is_Add(iteration_path) || is_Sub(iteration_path)) { - /* We test against the latest value of the iv. */ - is_latest_val = 1; - loop_info.add = iteration_path; - DB((dbg, LEVEL_4, "Got add %N (maybe not sane)\n", loop_info.add)); + DB((dbg, LEVEL_4, "Case 1: Got add %N (maybe not sane)\n", loop_info.add)); /* Preds of the add should be step and the iteration_phi */ success = get_const_pred(loop_info.add, &loop_info.step, &loop_info.iteration_phi); @@ -2072,16 +2147,13 @@ static unsigned get_unroll_decision_invariant(void) if (! success) return 0; - DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val)); + DB((dbg, LEVEL_4, "Got start A %N\n", loop_info.start_val)); } else if (is_Phi(iteration_path)) { ir_node *new_iteration_phi; - /* We compare with the value the iv had entering this run. */ - is_latest_val = 0; - loop_info.iteration_phi = iteration_path; - DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi)); + DB((dbg, LEVEL_4, "Case 2: Got phi %N\n", loop_info.iteration_phi)); /* Find start_val and add-node. * Does necessary sanity check of add, if it is already set. */ @@ -2089,14 +2161,14 @@ static unsigned get_unroll_decision_invariant(void) if (! success) return 0; - DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val)); + DB((dbg, LEVEL_4, "Got start B %N\n", loop_info.start_val)); DB((dbg, LEVEL_4, "Got add or sub %N\n", loop_info.add)); success = get_const_pred(loop_info.add, &loop_info.step, &new_iteration_phi); if (! success) return 0; - DB((dbg, LEVEL_4, "Got step %N\n", loop_info.step)); + DB((dbg, LEVEL_4, "Got step (B) %N\n", loop_info.step)); if (loop_info.iteration_phi != new_iteration_phi) return 0; @@ -2120,7 +2192,6 @@ static unsigned get_unroll_decision_invariant(void) DB((dbg, LEVEL_4, "mode integer\n")); step_tar = get_Const_tarval(loop_info.step); - start_tar = get_Const_tarval(loop_info.start_val); if (tarval_is_null(step_tar)) { /* TODO Might be worth a warning. */ @@ -2136,9 +2207,9 @@ static unsigned get_unroll_decision_invariant(void) /* Returns unroll factor, * given maximum unroll factor and number of loop passes. */ -static unsigned get_preferred_factor_constant(tarval *count_tar) +static unsigned get_preferred_factor_constant(ir_tarval *count_tar) { - tarval *tar_6, *tar_5, *tar_4, *tar_3, *tar_2; + ir_tarval *tar_6, *tar_5, *tar_4, *tar_3, *tar_2; unsigned prefer; ir_mode *mode = get_irn_mode(loop_info.end_val); @@ -2220,19 +2291,22 @@ static unsigned get_preferred_factor_constant(tarval *count_tar) } /* Check if cur_loop is a simple counting loop. - * Start, step and end are constants. */ + * Start, step and end are constants. + * TODO The whole constant case should use procedures similar to + * the invariant case, as they are more versatile. */ /* TODO split. */ static unsigned get_unroll_decision_constant(void) { - ir_node *projres, *loop_condition, *iteration_path; - unsigned success, is_latest_val; - tarval *start_tar, *end_tar, *step_tar, *diff_tar, *count_tar, *stepped; - pn_Cmp proj_proj, norm_proj; - ir_mode *mode; + ir_node *cmp, *iteration_path; + unsigned success, is_latest_val; + ir_tarval *start_tar, *end_tar, *step_tar, *diff_tar, *count_tar; + ir_tarval *stepped; + ir_relation proj_proj, norm_proj; + ir_mode *mode; /* RETURN if loop is not 'simple' */ - projres = is_simple_loop(); - if (projres == NULL) + cmp = is_simple_loop(); + if (cmp == NULL) return 0; /* One in of the loop condition needs to be loop invariant. => end_val @@ -2250,9 +2324,7 @@ static unsigned get_unroll_decision_constant(void) /\ */ - loop_condition = get_irn_n(projres, 0); - - success = get_const_pred(loop_condition, &loop_info.end_val, &iteration_path); + success = get_const_pred(cmp, &loop_info.end_val, &iteration_path); if (! success) return 0; @@ -2266,7 +2338,7 @@ static unsigned get_unroll_decision_constant(void) is_latest_val = 1; loop_info.add = iteration_path; - DB((dbg, LEVEL_4, "Got add %N (maybe not sane)\n", loop_info.add)); + DB((dbg, LEVEL_4, "Case 2: Got add %N (maybe not sane)\n", loop_info.add)); /* Preds of the add should be step and the iteration_phi */ success = get_const_pred(loop_info.add, &loop_info.step, &loop_info.iteration_phi); @@ -2295,7 +2367,7 @@ static unsigned get_unroll_decision_constant(void) is_latest_val = 0; loop_info.iteration_phi = iteration_path; - DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi)); + DB((dbg, LEVEL_4, "Case 1: Got phi %N \n", loop_info.iteration_phi)); /* Find start_val and add-node. * Does necessary sanity check of add, if it is already set. */ @@ -2358,12 +2430,12 @@ static unsigned get_unroll_decision_constant(void) /* Iv will not pass end_val (except overflows). * Nothing done, as it would yield to no advantage. */ if (tarval_is_negative(count_tar)) { - DB((dbg, LEVEL_1, "Loop is endless or never taken.")); + DB((dbg, LEVEL_4, "Loop is endless or never taken.")); /* TODO Might be worth a warning. */ return 0; } - count_stats(stats.u_simple_counting_loop); + ++stats.u_simple_counting_loop; loop_info.latest_value = is_latest_val; @@ -2384,17 +2456,16 @@ static unsigned get_unroll_decision_constant(void) DB((dbg, LEVEL_4, "stepped to %ld\n", get_tarval_long(stepped))); - proj_proj = get_Proj_proj(projres); + proj_proj = get_Cmp_relation(cmp); /* Assure that norm_proj is the stay-in-loop case. */ if (loop_info.exit_cond == 1) - norm_proj = get_math_inverted_case(proj_proj); + norm_proj = get_negated_relation(proj_proj); else norm_proj = proj_proj; - DB((dbg, LEVEL_4, "normalized projection %s\n", get_pnc_string(norm_proj))); - + DB((dbg, LEVEL_4, "normalized projection %s\n", get_relation_string(norm_proj))); /* Executed at most once (stay in counting loop if a Eq b) */ - if (norm_proj == pn_Cmp_Eq) + if (norm_proj == ir_relation_equal) /* TODO Might be worth a warning. */ return 0; @@ -2427,6 +2498,24 @@ static unsigned get_unroll_decision_constant(void) */ static void unroll_loop(void) { + + if (! (loop_info.nodes > 0)) + return; + + if (loop_info.nodes > opt_params.max_unrolled_loop_size) { + DB((dbg, LEVEL_2, "Nodes %d > allowed nodes %d\n", + loop_info.nodes, opt_params.max_unrolled_loop_size)); + ++stats.too_large; + return; + } + + if (loop_info.calls > 0) { + DB((dbg, LEVEL_2, "Calls %d > allowed calls 0\n", + loop_info.calls)); + ++stats.calls_limit; + return; + } + unroll_nr = 0; /* get_unroll_decision_constant and invariant are completely @@ -2447,7 +2536,7 @@ static void unroll_loop(void) loop_info.unroll_kind = invariant; } - DB((dbg, LEVEL_1, " *** Unrolling %d times ***\n", unroll_nr)); + DB((dbg, LEVEL_2, " *** Unrolling %d times ***\n", unroll_nr)); if (unroll_nr > 1) { loop_entries = NEW_ARR_F(entry_edge, 0); @@ -2455,13 +2544,18 @@ static void unroll_loop(void) /* Get loop outs */ irg_walk_graph(current_ir_graph, get_loop_entries, NULL, NULL); - if ((int)get_tarval_long(loop_info.count_tar) == unroll_nr) - loop_info.needs_backedge = 0; - else + if (loop_info.unroll_kind == constant) { + if ((int)get_tarval_long(loop_info.count_tar) == unroll_nr) + loop_info.needs_backedge = 0; + else + loop_info.needs_backedge = 1; + } else { loop_info.needs_backedge = 1; + } /* Use phase to keep copy of nodes from the condition chain. */ - phase = new_phase(current_ir_graph, phase_irn_init_default); + ir_nodemap_init(&map, current_ir_graph); + obstack_init(&obst); /* Copies the loop */ copy_loop(loop_entries, unroll_nr - 1); @@ -2469,90 +2563,43 @@ static void unroll_loop(void) /* Line up the floating copies. */ place_copies(unroll_nr - 1); - /* Remove phis with 1 in*/ + /* Remove phis with 1 in + * If there were no nested phis, this would not be necessary. + * Avoiding the creation in the first place + * leads to complex special cases. */ irg_walk_graph(current_ir_graph, correct_phis, NULL, NULL); - /* dump_ir_block_graph(current_ir_graph, "-DONE"); */ - if (loop_info.unroll_kind == constant) - count_stats(stats.constant_unroll); + ++stats.constant_unroll; else - count_stats(stats.invariant_unroll); + ++stats.invariant_unroll; - set_irg_doms_inconsistent(current_ir_graph); - set_irg_loopinfo_inconsistent(current_ir_graph); - /* TODO is it? */ - set_irg_outs_inconsistent(current_ir_graph); + clear_irg_state(current_ir_graph, IR_GRAPH_STATE_CONSISTENT_DOMINANCE); DEL_ARR_F(loop_entries); + obstack_free(&obst, NULL); + ir_nodemap_destroy(&map); } } /* Analyzes the loop, and checks if size is within allowed range. * Decides if loop will be processed. */ -static void init_analyze(ir_loop *loop) +static void init_analyze(ir_graph *irg, ir_loop *loop) { - /* Expect no benefit of big loops. */ - /* TODO tuning/make parameter */ - int loop_depth; - unsigned max_loop_nodes = opt_params.max_loop_size; - unsigned max_loop_nodes_adapted; - int depth_adaption = opt_params.depth_adaption; - cur_loop = loop; - loop_head = NULL; - loop_head_valid = 1; + loop_head = NULL; + loop_head_valid = true; /* Reset loop info */ memset(&loop_info, 0, sizeof(loop_info_t)); - DB((dbg, LEVEL_1, " >>>> current loop includes node %N <<<\n", - get_loop_node(loop, 0))); + DB((dbg, LEVEL_1, " >>>> current loop %ld <<<\n", + get_loop_loop_nr(loop))); /* Collect loop informations: head, node counts. */ - irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL); - - /* Depth of 0 is the procedure and 1 a topmost loop. */ - loop_depth = get_loop_depth(loop) - 1; - - /* Calculating in per mil. */ - max_loop_nodes_adapted = get_max_nodes_adapted(loop_depth); - - DB((dbg, LEVEL_1, "max_nodes: %d\nmax_nodes_adapted %d at depth of %d (adaption %d)\n", - max_loop_nodes, max_loop_nodes_adapted, loop_depth, depth_adaption)); - - if (! (loop_info.nodes > 0)) - return; - -#if LOOP_IGNORE_NODE_LIMITS - DB((dbg, LEVEL_1, "WARNING: Loop node limitations ignored.")); -#else - if (loop_info.nodes > max_loop_nodes) { - /* Only for stats */ - DB((dbg, LEVEL_1, "Nodes %d > allowed nodes %d\n", - loop_info.nodes, loop_depth, max_loop_nodes)); - count_stats(stats.too_large); - /* no RETURN */ - /* Adaption might change it */ - } - - /* Limit processing to loops smaller than given parameter. */ - if (loop_info.nodes > max_loop_nodes_adapted) { - DB((dbg, LEVEL_1, "Nodes %d > allowed nodes (depth %d adapted) %d\n", - loop_info.nodes, loop_depth, max_loop_nodes_adapted)); - count_stats(stats.too_large_adapted); - return; - } - - if (loop_info.calls > opt_params.allowed_calls) { - DB((dbg, LEVEL_1, "Calls %d > allowed calls %d\n", - loop_info.calls, max_calls)); - count_stats(stats.calls_limit); - return; - } -#endif + irg_walk_graph(irg, get_loop_info, NULL, NULL); /* RETURN if there is no valid head */ if (!loop_head || !loop_head_valid) { @@ -2562,9 +2609,16 @@ static void init_analyze(ir_loop *loop) DB((dbg, LEVEL_1, "Loophead: %N\n", loop_head)); } + if (loop_info.branches > opt_params.max_branches) { + DB((dbg, LEVEL_1, "Branches %d > allowed branches %d\n", + loop_info.branches, opt_params.max_branches)); + ++stats.calls_limit; + return; + } + switch (loop_op) { case loop_op_inversion: - loop_inversion(); + loop_inversion(irg); break; case loop_op_unrolling: @@ -2574,44 +2628,57 @@ static void init_analyze(ir_loop *loop) default: panic("Loop optimization not implemented."); } - DB((dbg, LEVEL_1, " <<<< end of loop with node %N >>>>\n", - get_loop_node(loop, 0))); + DB((dbg, LEVEL_1, " <<<< end of loop with node %ld >>>>\n", + get_loop_loop_nr(loop))); } /* Find innermost loops and add them to loops. */ static void find_innermost_loop(ir_loop *loop) { - /* descend into sons */ - int sons = get_loop_n_sons(loop); - - if (sons == 0) { - ARR_APP1(ir_loop *, loops, loop); - } else { - int s; - for (s=0; s>> unrolling (Startnode %N) <<<\n", - get_irg_start(irg))); - loop_optimization(irg); - - DB((dbg, LEVEL_1, " >>> unrolling done (Startnode %N) <<<\n", - get_irg_start(irg))); + return 0; } -void do_loop_inversion(ir_graph *irg) +static ir_graph_state_t perform_loop_inversion(ir_graph *irg) { loop_op = loop_op_inversion; - - DB((dbg, LEVEL_1, " >>> inversion (Startnode %N) <<<\n", - get_irg_start(irg))); - loop_optimization(irg); - - DB((dbg, LEVEL_1, " >>> inversion done (Startnode %N) <<<\n", - get_irg_start(irg))); + return 0; } -void do_loop_peeling(ir_graph *irg) +static ir_graph_state_t perform_loop_peeling(ir_graph *irg) { loop_op = loop_op_peeling; + loop_optimization(irg); + return 0; +} - DB((dbg, LEVEL_1, " >>> peeling (Startnode %N) <<<\n", - get_irg_start(irg))); +static optdesc_t opt_unroll_loops = { + "unroll-loops", + IR_GRAPH_STATE_CONSISTENT_OUT_EDGES | IR_GRAPH_STATE_CONSISTENT_OUTS | IR_GRAPH_STATE_CONSISTENT_LOOPINFO, + perform_loop_unrolling, +}; - loop_optimization(irg); +static optdesc_t opt_invert_loops = { + "invert-loops", + IR_GRAPH_STATE_CONSISTENT_OUT_EDGES | IR_GRAPH_STATE_CONSISTENT_OUTS | IR_GRAPH_STATE_CONSISTENT_LOOPINFO, + perform_loop_inversion, +}; - DB((dbg, LEVEL_1, " >>> peeling done (Startnode %N) <<<\n", - get_irg_start(irg))); +static optdesc_t opt_peel_loops = { + "peel-loops", + IR_GRAPH_STATE_CONSISTENT_OUT_EDGES | IR_GRAPH_STATE_CONSISTENT_OUTS | IR_GRAPH_STATE_CONSISTENT_LOOPINFO, + perform_loop_peeling, +}; -} +void do_loop_unrolling(ir_graph *irg) +{ perform_irg_optimization(irg, &opt_unroll_loops); } + +void do_loop_inversion(ir_graph *irg) +{ perform_irg_optimization(irg, &opt_invert_loops); } + +void do_loop_peeling(ir_graph *irg) +{ perform_irg_optimization(irg, &opt_peel_loops); } ir_graph_pass_t *loop_inversion_pass(const char *name) {