X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fopt%2Floop.c;h=7d88ca56d00356bcb7fd32e923a8e43aebebea5c;hb=6f068af98daa4725d60e5d23a8f98ec2841cfa44;hp=918d0fc225e9f4c5f19156d85324d1354dba934c;hpb=43aca1df83b9862e00da7d604c09521a0aabe770;p=libfirm diff --git a/ir/opt/loop.c b/ir/opt/loop.c index 918d0fc22..7d88ca56d 100644 --- a/ir/opt/loop.c +++ b/ir/opt/loop.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 1995-2010 University of Karlsruhe. All right reserved. + * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved. * * This file is part of libFirm. * @@ -24,7 +24,6 @@ * * @version $Id$ */ - #include "config.h" #include "iroptimize.h" @@ -45,6 +44,7 @@ #include "irpass.h" #include "irdom.h" +#include #include "irbackedge_t.h" #include "irphase_t.h" #include "irloop_t.h" @@ -53,7 +53,7 @@ DEBUG_ONLY(static firm_dbg_module_t *dbg); /* DBG print stats for every procedure. */ -#define LOOP_OPT_STATS 0 +#define LOOP_OPT_STATS 1 /* DBG: Ignore node limits and process every possible loop. */ #define LOOP_IGNORE_NODE_LIMITS 0 @@ -91,7 +91,6 @@ typedef struct entry_edge { /* Node info for unrolling. */ typedef struct unrolling_node_info { ir_node **copies; - /*ir_node **ins;*/ } unrolling_node_info; /* Outs of the nodes head. */ @@ -159,16 +158,19 @@ static void do_print_stats(void) /* Commandline parameters */ typedef struct loop_opt_params_t { - unsigned max_loop_size; /* Maximum number of nodes */ - int depth_adaption; /* Loop nest depth adaption */ - unsigned allowed_calls; /* Number of calls allowed */ - unsigned count_phi:1; /* Count phi nodes */ - unsigned count_proj:1; /* Count projections */ +unsigned max_loop_size; /* Maximum number of nodes [nodes]*/ +int depth_adaption; /* Loop nest depth adaption [percent] */ +unsigned allowed_calls; /* Number of calls allowed [number] */ +unsigned count_phi:1; /* Count phi nodes */ +unsigned count_proj:1; /* Count projections */ - unsigned max_cc_size; /* Maximum condition chain size */ +unsigned max_cc_size; /* Maximum condition chain size [nodes] */ +unsigned max_branches; - unsigned allow_const_unrolling:1; - unsigned allow_invar_unrolling:1; +unsigned max_unrolled_loop_size; /* [nodes] */ +unsigned allow_const_unrolling:1; +unsigned allow_invar_unrolling:1; +unsigned invar_unrolling_min_size; /* [nodes] */ } loop_opt_params_t; @@ -176,22 +178,23 @@ static loop_opt_params_t opt_params; /* Loop analysis informations */ typedef struct loop_info_t { - unsigned nodes; /* node count */ - unsigned ld_st; /* load and store nodes */ - unsigned calls; /* number of calls */ - unsigned cf_outs; /* number of cf edges which leave the loop */ - entry_edge cf_out; /* single loop leaving cf edge */ - int be_src_pos; /* position of the single own backedge in the head */ + unsigned nodes; /* node count */ + unsigned ld_st; /* load and store nodes */ + unsigned branches; /* number of conditions */ + unsigned calls; /* number of calls */ + unsigned cf_outs; /* number of cf edges which leave the loop */ + entry_edge cf_out; /* single loop leaving cf edge */ + int be_src_pos; /* position of the single own backedge in the head */ /* for inversion */ - unsigned cc_size; /* nodes in the condition chain */ + unsigned cc_size; /* nodes in the condition chain */ /* for unrolling */ - unsigned max_unroll; /* Number of unrolls satisfying max_loop_size */ - unsigned exit_cond; /* 1 if condition==true exits the loop. */ - unsigned latest_value:1; /* 1 if condition is checked against latest counter value */ - unsigned needs_backedge:1; /* 0 if loop is completely unrolled */ - unsigned decreasing:1; /* Step operation is_Sub, or step is<0 */ + unsigned max_unroll; /* Number of unrolls satisfying max_loop_size */ + unsigned exit_cond; /* 1 if condition==true exits the loop. */ + unsigned latest_value:1; /* 1 if condition is checked against latest counter value */ + unsigned needs_backedge:1; /* 0 if loop is completely unrolled */ + unsigned decreasing:1; /* Step operation is_Sub, or step is<0 */ /* IV informations of a simple loop */ ir_node *start_val; @@ -200,10 +203,10 @@ typedef struct loop_info_t { ir_node *iteration_phi; ir_node *add; - tarval *count_tar; /* Number of loop iterations */ + ir_tarval *count_tar; /* Number of loop iterations */ - ir_node *duff_cond; /* Duff mod */ - unrolling_kind_flag unroll_kind; /* constant or invariant unrolling */ + ir_node *duff_cond; /* Duff mod */ + unrolling_kind_flag unroll_kind; /* constant or invariant unrolling */ } loop_info_t; /* Information about the current loop */ @@ -237,19 +240,13 @@ typedef enum loop_op_t { /* Saves which loop operation to do until after basic tests. */ static loop_op_t loop_op; -/************************************************************************/ - /* Returns the maximum nodes for the given nest depth */ static unsigned get_max_nodes_adapted(unsigned depth) { - int adapt_permil = opt_params.depth_adaption * depth; - unsigned permil_change; - - if (adapt_permil < -1000) - return 0; + double perc = 100.0 + (double)opt_params.depth_adaption; + double factor = pow(perc / 100.0, depth); - permil_change = 1000 + adapt_permil; - return (opt_params.max_loop_size * permil_change) / 1000; + return (int)((double)opt_params.max_loop_size * factor); } /* Reset nodes link. For use with a walker. */ @@ -286,6 +283,17 @@ static void get_loop_info(ir_node *node, void *env) pred_in_loop = is_in_loop(pred); node_in_loop = is_in_loop(node); + if (!node_in_loop && pred_in_loop && is_Block(node)) + { + entry_edge entry; + entry.node = node; + entry.pos = i; + entry.pred = pred; + /* Count cf outs */ + ++loop_info.cf_outs; + loop_info.cf_out = entry; + } + /* collect some loop information */ if (node_in_loop) { if (is_Phi(node) && opt_params.count_phi) @@ -300,20 +308,34 @@ static void get_loop_info(ir_node *node, void *env) if (is_Call(node)) ++loop_info.calls; + } /* Find the loops head/the blocks with cfpred outside of the loop */ - if (is_Block(node) && node_in_loop && !pred_in_loop && loop_head_valid) { - ir_node *cfgpred = get_Block_cfgpred(node, i); - - if (!is_in_loop(cfgpred)) { - DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", - node, pred)); - /* another head? We do not touch this. */ - if (loop_head && loop_head != node) { - loop_head_valid = 0; - } else { - loop_head = node; + if (is_Block(node)) { + const ir_edge_t *edge; + unsigned outs_n = 0; + + /* Count innerloop branches */ + foreach_out_edge_kind(node, edge, EDGE_KIND_NORMAL) { + if (is_Block(get_edge_src_irn(edge)) && is_in_loop(get_edge_src_irn(edge))) + ++outs_n; + } + if (outs_n > 1) + ++loop_info.branches; + + if (node_in_loop && !pred_in_loop && loop_head_valid) { + ir_node *cfgpred = get_Block_cfgpred(node, i); + + if (!is_in_loop(cfgpred)) { + DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", + node, pred)); + /* another head? We do not touch this. */ + if (loop_head && loop_head != node) { + loop_head_valid = 0; + } else { + loop_head = node; + } } } } @@ -341,11 +363,6 @@ static void get_loop_entries(ir_node *node, void *env) entry.pos = i; entry.pred = pred; ARR_APP1(entry_edge, loop_entries, entry); - /* Count cf outs */ - if (is_Block(node)) { - ++loop_info.cf_outs; - loop_info.cf_out = entry; - } } } } @@ -361,7 +378,7 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, int fi { int i; int n_cfgpreds; - ir_graph *irg; + ir_graph *irg = get_irn_irg(block); ir_node *phi; ir_node **in; @@ -371,7 +388,7 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, int fi * Dead and bad blocks. */ if (get_irn_arity(block) < 1 || is_Bad(block)) { DB((dbg, LEVEL_5, "ssa bad %N\n", block)); - return new_Bad(); + return new_r_Bad(irg, mode); } if (block == ssa_second_def_block && !first) { @@ -386,7 +403,6 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, int fi return value; } - irg = get_irn_irg(block); assert(block != get_irg_start_block(irg)); /* a Block with only 1 predecessor needs no Phi */ @@ -407,7 +423,7 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, int fi /* create a new Phi */ NEW_ARR_A(ir_node*, in, n_cfgpreds); for (i = 0; i < n_cfgpreds; ++i) - in[i] = new_Unknown(mode); + in[i] = new_r_Dummy(irg, mode); phi = new_r_Phi(block, n_cfgpreds, in, mode); /* Important: always keep block phi list up to date. */ @@ -565,22 +581,43 @@ static unsigned is_nodes_block_marked(ir_node* node) /* Extends a nodes ins by node new. * NOTE: This is slow if a node n needs to be extended more than once. */ -static int extend_irn(ir_node *n, ir_node *new) +static void extend_irn(ir_node *n, ir_node *newnode, int new_is_backedge) { ir_node **ins; int i; int arity = get_irn_arity(n); int new_arity = arity + 1; + int *bes; + NEW_ARR_A(int, bes, new_arity); NEW_ARR_A(ir_node *, ins, new_arity); + /* save bes */ + /* Bes are important! + * Another way would be recreating the looptree, + * but after that we cannot distinguish already processed loops + * from not yet processed ones. */ + if (is_Block(n)) { + for(i = 0; i < arity; ++i) { + bes[i] = is_backedge(n, i); + } + bes[i] = new_is_backedge; + } + for(i = 0; i < arity; ++i) { ins[i] = get_irn_n(n, i); } - ins[i] = new; + ins[i] = newnode; set_irn_in(n, new_arity, ins); - return arity; + + /* restore bes */ + if (is_Block(n)) { + for(i = 0; i < new_arity; ++i) { + if (bes[i]) + set_backedge(n, i); + } + } } /* Extends a block by a copy of its pred at pos, @@ -596,7 +633,7 @@ static void extend_ins_by_copy(ir_node *block, int pos) pred = get_irn_n(block, pos); new_in = get_inversion_copy(pred); DB((dbg, LEVEL_5, "Extend block %N by %N cp of %N\n", block, new_in, pred)); - extend_irn(block, new_in); + extend_irn(block, new_in, 0); /* Extend block phis by copy of definition at pos */ for_each_phi(block, phi) { @@ -612,7 +649,7 @@ static void extend_ins_by_copy(ir_node *block, int pos) new_in = cp; DB((dbg, LEVEL_5, "Extend phi %N by %N cp of %N\n", phi, new_in, pred)); - extend_irn(phi, new_in); + extend_irn(phi, new_in, 0); } } @@ -623,7 +660,7 @@ static int get_backedge_n(ir_node *block, unsigned with_alien) int be_n = 0; int arity = get_irn_arity(block); - assert(is_Block(block) && "We only required backedges of blocks."); + assert(is_Block(block)); for (i = 0; i < arity; ++i) { ir_node *pred = get_irn_n(block, i); @@ -664,7 +701,7 @@ static ir_node *copy_node(ir_node *node) * Order of ins is important for later usage. */ static void copy_walk(ir_node *node, walker_condition *walk_condition, - ir_loop *set_loop) + ir_loop *set_loop) { int i; int arity; @@ -746,8 +783,8 @@ static void copy_walk(ir_node *node, walker_condition *walk_condition, * Order of ins is important for later usage. * Takes copy_index, to phase-link copy at specific index. */ -static void copy_walk_n(ir_node *node, - walker_condition *walk_condition, int copy_index) +static void copy_walk_n(ir_node *node, walker_condition *walk_condition, + int copy_index) { int i; int arity; @@ -821,8 +858,8 @@ static void copy_walk_n(ir_node *node, /* Removes alle Blocks with non marked predecessors from the condition chain. */ static void unmark_not_allowed_cc_blocks(void) { - int blocks = ARR_LEN(cc_blocks); - int i; + size_t blocks = ARR_LEN(cc_blocks); + size_t i; for(i = 0; i < blocks; ++i) { ir_node *block = cc_blocks[i]; @@ -846,20 +883,22 @@ static void unmark_not_allowed_cc_blocks(void) } } -/* Unmarks all cc blocks using cc_blocks except head. */ +/* Unmarks all cc blocks using cc_blocks except head. + * TODO: invert head for unrolling? */ static void unmark_cc_blocks(void) { - int blocks = ARR_LEN(cc_blocks); - int i; + size_t blocks = ARR_LEN(cc_blocks); + size_t i; for(i = 0; i < blocks; ++i) { ir_node *block = cc_blocks[i]; - /* Head is an exception. */ - if (block != loop_head) - set_Block_mark(block, 0); + /* TODO Head is an exception. */ + /*if (block != loop_head)*/ + set_Block_mark(block, 0); } - inversion_blocks_in_cc = 1; + /*inversion_blocks_in_cc = 1;*/ + inversion_blocks_in_cc = 0; /* invalidate */ loop_info.cc_size = 0; @@ -911,7 +950,7 @@ static void get_head_outs(ir_node *node, void *env) } /** - * Find condition chains, and add them to be inverted + * Find condition chains, and add them to be inverted. * A block belongs to the chain if a condition branches out of the loop. * (Some blocks need to be removed once again.) * Returns 1 if the given block belongs to the condition chain. @@ -980,7 +1019,7 @@ static unsigned find_condition_chain(ir_node *block) * / A* B / | * / /\ / ? | * / C* => D | - * / D Head | + * / D Head | * / A \_| * C */ @@ -1007,7 +1046,7 @@ static unsigned find_condition_chain(ir_node *block) } /** - * Rewires the copied condition chain. Removes backedges. + * Rewires the copied condition chain. Removes backedges * as this condition chain is prior to the loop. * Copy of loop_head must have phi list and old (unfixed) backedge info of the loop head. * (loop_head is already fixed, we cannot rely on it.) @@ -1018,10 +1057,11 @@ static void fix_copy_inversion(void) ir_node **ins; ir_node **phis; ir_node *phi, *next; - ir_node *head_cp = get_inversion_copy(loop_head); - int arity = get_irn_arity(head_cp); - int backedges = get_backedge_n(head_cp, 0); - int new_arity = arity - backedges; + ir_node *head_cp = get_inversion_copy(loop_head); + ir_graph *irg = get_irn_irg(head_cp); + int arity = get_irn_arity(head_cp); + int backedges = get_backedge_n(head_cp, 0); + int new_arity = arity - backedges; int pos; int i; @@ -1034,7 +1074,7 @@ static void fix_copy_inversion(void) ins[pos++] = get_irn_n(head_cp, i); } - new_head = new_Block(new_arity, ins); + new_head = new_r_Block(irg, new_arity, ins); phis = NEW_ARR_F(ir_node *, 0); @@ -1062,6 +1102,7 @@ static void fix_copy_inversion(void) DEL_ARR_F(phis); } + /* Puts the original condition chain at the end of the loop, * subsequently to the body. * Relies on block phi list and correct backedges. @@ -1072,9 +1113,10 @@ static void fix_head_inversion(void) ir_node **ins; ir_node *phi, *next; ir_node **phis; - int arity = get_irn_arity(loop_head); - int backedges = get_backedge_n(loop_head, 0); - int new_arity = backedges; + ir_graph *irg = get_irn_irg(loop_head); + int arity = get_irn_arity(loop_head); + int backedges = get_backedge_n(loop_head, 0); + int new_arity = backedges; int pos; int i; @@ -1087,7 +1129,7 @@ static void fix_head_inversion(void) ins[pos++] = get_irn_n(loop_head, i); } - new_head = new_Block(new_arity, ins); + new_head = new_r_Block(irg, new_arity, ins); phis = NEW_ARR_F(ir_node *, 0); @@ -1105,12 +1147,11 @@ static void fix_head_inversion(void) /* If assignment is in the condition chain, * we need to create a phi in the new loop head. * This can only happen for df, not cf. See find_condition_chains. */ - if (is_nodes_block_marked(pred)) { - /* Cannot do this here. */ - ins[pos++] = pred; /*fix_inner_cc_definitions(phi, pred);*/ - } else { + /*if (is_nodes_block_marked(pred)) { ins[pos++] = pred; - } + } else {*/ + ins[pos++] = pred; + } } @@ -1120,7 +1161,7 @@ static void fix_head_inversion(void) ARR_APP1(ir_node *, phis, new_phi); - DB((dbg, LEVEL_5, "fix inverted head should exch %N by %N (arity %d)\n", phi, new_phi, pos )); + DB((dbg, LEVEL_5, "fix inverted head should exch %N by %N (pos %d)\n", phi, new_phi, pos )); } pos = 0; @@ -1139,7 +1180,7 @@ static void fix_head_inversion(void) /* Does the loop inversion. */ static void inversion_walk(entry_edge *head_entries) { - int i; + size_t i; /* * The order of rewiring bottom-up is crucial. @@ -1233,14 +1274,60 @@ static void inversion_walk(entry_edge *head_entries) /* 5. Remove the backedges of the copied condition chain, * because it is going to be the new 'head' in advance to the loop. */ fix_copy_inversion(); + } /* Performs loop inversion of cur_loop if possible and reasonable. */ static void loop_inversion(void) { + int loop_depth; + unsigned max_loop_nodes = opt_params.max_loop_size; + unsigned max_loop_nodes_adapted; + int depth_adaption = opt_params.depth_adaption; + unsigned do_inversion = 1; unsigned has_cc = 0; + /* Depth of 0 is the procedure and 1 a topmost loop. */ + loop_depth = get_loop_depth(cur_loop) - 1; + + /* Calculating in per mil. */ + max_loop_nodes_adapted = get_max_nodes_adapted(loop_depth); + + DB((dbg, LEVEL_1, "max_nodes: %d\nmax_nodes_adapted %d at depth of %d (adaption %d)\n", + max_loop_nodes, max_loop_nodes_adapted, loop_depth, depth_adaption)); + + if (! (loop_info.nodes > 0)) + return; + +#if LOOP_IGNORE_NODE_LIMITS + DB((dbg, LEVEL_1, "WARNING: Loop node limitations ignored.")); +#else + if (loop_info.nodes > max_loop_nodes) { + /* Only for stats */ + DB((dbg, LEVEL_1, "Nodes %d > allowed nodes %d\n", + loop_info.nodes, loop_depth, max_loop_nodes)); + count_stats(stats.too_large); + /* no RETURN */ + /* Adaption might change it */ + } + + /* Limit processing to loops smaller than given parameter. */ + if (loop_info.nodes > max_loop_nodes_adapted) { + DB((dbg, LEVEL_1, "Nodes %d > allowed nodes (depth %d adapted) %d\n", + loop_info.nodes, loop_depth, max_loop_nodes_adapted)); + count_stats(stats.too_large_adapted); + return; + } + + if (loop_info.calls > opt_params.allowed_calls) { + DB((dbg, LEVEL_1, "Calls %d > allowed calls %d\n", + loop_info.calls, opt_params.allowed_calls)); + count_stats(stats.calls_limit); + return; + } +#endif + /*inversion_head_node_limit = INT_MAX;*/ ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK); @@ -1271,18 +1358,17 @@ static void loop_inversion(void) #else /* Condition chain too large. * Loop should better be small enough to fit into the cache. */ - /* FIXME Of course, we should take a small enough cc in the first place, + /* TODO Of course, we should take a small enough cc in the first place, * which is not that simple. (bin packing) */ if (loop_info.cc_size > opt_params.max_cc_size) { count_stats(stats.cc_limit_reached); - /* Only head taken? */ - if (inversion_blocks_in_cc == 1) - do_inversion = 0; - else - /* Unmark cc blocks except the head. - * Invert head only for possible unrolling. */ - unmark_cc_blocks(); + do_inversion = 0; + + /* Unmark cc blocks except the head. + * Invert head only for possible unrolling. */ + unmark_cc_blocks(); + } #endif @@ -1338,17 +1424,19 @@ static void unrolling_fix_loop_head_inv(void) ins[0] = loop_condition; ins[1] = proj; - set_irn_in(loop_head, 2, ins); + DB((dbg, LEVEL_4, "Rewire ins of block loophead %N to pred %N and duffs entry %N \n" , loop_head, ins[0], ins[1])); for_each_phi(loop_head, phi) { ir_node *pred = get_irn_n(phi, loop_info.be_src_pos); + /* TODO we think it is a phi, but for Mergesort it is not the case.*/ + ir_node *last_pred = get_unroll_copy(pred, unroll_nr - 1); ins[0] = last_pred; - ins[1] = get_irn_link(phi); - + ins[1] = (ir_node*)get_irn_link(phi); set_irn_in(phi, 2, ins); + DB((dbg, LEVEL_4, "Rewire ins of loophead phi %N to pred %N and duffs entry %N \n" , phi, ins[0], ins[1])); } } @@ -1356,6 +1444,7 @@ static void unrolling_fix_loop_head_inv(void) static void correct_phis(ir_node *node, void *env) { (void)env; + if (is_Phi(node) && get_irn_arity(node) == 1) { ir_node *exch; ir_node *in[1]; @@ -1364,7 +1453,7 @@ static void correct_phis(ir_node *node, void *env) exch = new_rd_Phi(get_irn_dbg_info(node), get_nodes_block(node), 1, in, - get_irn_mode(node)); + get_irn_mode(node)); exchange(node, exch); } @@ -1374,7 +1463,8 @@ static void correct_phis(ir_node *node, void *env) static void place_copies(int copies) { ir_node *loophead = loop_head; - int c, i; + size_t i; + int c; int be_src_pos = loop_info.be_src_pos; /* Serialize loops by fixing their head ins. @@ -1416,14 +1506,16 @@ static void place_copies(int copies) } else { /* Invariant case */ /* Every node has 2 ins. One from the duff blocks - * and one from the previous unrolled loop. */ + * and one from the previously unrolled loop. */ ir_node *ins[2]; /* Calculate corresponding projection of mod result for this copy c */ ir_node *proj = new_Proj(loop_info.duff_cond, mode_X, unroll_nr - c - 1); + DB((dbg, LEVEL_4, "New duff proj %N\n" , proj)); ins[0] = new_jmp; ins[1] = proj; - set_irn_in(lower, 1, ins); + set_irn_in(lower, 2, ins); + DB((dbg, LEVEL_4, "Rewire ins of Block %N to pred %N and duffs entry %N \n" , lower, ins[0], ins[1])); for_each_phi(loophead, phi) { ir_node *topmost_phi_pred = get_irn_n(phi, be_src_pos); @@ -1432,8 +1524,10 @@ static void place_copies(int copies) ir_node *duff_phi; lower_phi = get_unroll_copy(phi, c + 1); - duff_phi = get_irn_link(lower_phi); + duff_phi = (ir_node*)get_irn_link(phi); + DB((dbg, LEVEL_4, "DD Link of %N is %N\n" , phi, duff_phi)); + /* */ if (is_in_loop(topmost_phi_pred)) { upper_phi_pred = get_unroll_copy(topmost_phi_pred, c); } else { @@ -1442,13 +1536,13 @@ static void place_copies(int copies) ins[0] = upper_phi_pred; ins[1] = duff_phi; - set_irn_in(lower_phi, 2, ins); + DB((dbg, LEVEL_4, "Rewire ins of %N to pred %N and duffs entry %N \n" , lower_phi, ins[0], ins[1])); } } } - /* Reconnect loop landing pad with last copy. */ + /* Reconnect last copy. */ for (i = 0; i < ARR_LEN(loop_entries); ++i) { entry_edge edge = loop_entries[i]; /* Last copy is at the bottom */ @@ -1470,13 +1564,14 @@ static void place_copies(int copies) ir_node *last_pred; /* It is possible, that the value used - * in the OWN backedge path is NOT defined in this loop. */ + * in the OWN backedge path is NOT assigned in this loop. */ if (is_in_loop(pred)) last_pred = get_unroll_copy(pred, copies); else last_pred = pred; set_irn_n(phi, be_src_pos, last_pred); } + } else { unrolling_fix_loop_head_inv(); } @@ -1485,11 +1580,12 @@ static void place_copies(int copies) /* Copies the cur_loop several times. */ static void copy_loop(entry_edge *cur_loop_outs, int copies) { - int i, c; + int c; ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); for (c = 0; c < copies; ++c) { + size_t i; inc_irg_visited(current_ir_graph); @@ -1508,31 +1604,30 @@ static void copy_loop(entry_edge *cur_loop_outs, int copies) /* Creates a new phi from the given phi node omitting own bes, * using be_block as supplier of backedge informations. */ -static ir_node *clone_phis_sans_bes(ir_node *node, ir_node *be_block) +static ir_node *clone_phis_sans_bes(ir_node *phi, ir_node *be_block, ir_node *dest_block) { ir_node **ins; - int arity = get_irn_arity(node); + int arity = get_irn_arity(phi); int i, c = 0; + ir_node *newphi; - assert(get_irn_arity(node) == get_irn_arity(be_block)); - assert(is_Phi(node)); + assert(get_irn_arity(phi) == get_irn_arity(be_block)); + assert(is_Phi(phi)); ins = NEW_ARR_F(ir_node *, arity); for (i = 0; i < arity; ++i) { if (! is_own_backedge(be_block, i)) { - ins[c] = get_irn_n(node, i); + ins[c] = get_irn_n(phi, i); ++c; } - /* } else { - ir_node *pred = get_inr_n(node, i); - if (! is_in_loop(pred)) { - ins[c] = pred; - ++c; - } - }*/ } - return new_r_Phi(get_nodes_block(node), c, ins, get_irn_mode(node)); + newphi = new_r_Phi(dest_block, c, ins, get_irn_mode(phi)); + + set_irn_link(phi, newphi); + DB((dbg, LEVEL_4, "Linking for duffs device %N to %N\n", phi, newphi)); + + return newphi; } /* Creates a new block from the given block node omitting own bes, @@ -1546,7 +1641,7 @@ static ir_node *clone_block_sans_bes(ir_node *node, ir_node *be_block) assert(get_irn_arity(node) == get_irn_arity(be_block)); assert(is_Block(node)); - ins = NEW_ARR_F(ir_node *, arity); + NEW_ARR_A(ir_node *, ins, arity); for (i = 0; i < arity; ++i) { if (! is_own_backedge(be_block, i)) { ins[c] = get_irn_n(node, i); @@ -1557,6 +1652,21 @@ static ir_node *clone_block_sans_bes(ir_node *node, ir_node *be_block) return new_Block(c, ins); } +/* Creates a structure to calculate absolute value of node op. + * Returns mux node with absolute value. */ +static ir_node *new_Abs(ir_node *op, ir_mode *mode) +{ + ir_graph *irg = get_irn_irg(op); + ir_node *block = get_nodes_block(op); + ir_node *zero = new_r_Const(irg, get_mode_null(mode)); + ir_node *cmp = new_r_Cmp(block, op, zero, ir_relation_less); + ir_node *minus_op = new_r_Minus(block, op, mode); + ir_node *mux = new_r_Mux(block, cmp, op, minus_op, mode); + + return mux; +} + + /* Creates blocks for duffs device, using previously obtained * informations about the iv. * TODO split */ @@ -1565,8 +1675,8 @@ static void create_duffs_block(void) ir_mode *mode; ir_node *block1, *count_block, *duff_block; - ir_node *ems, *ems_divmod, *ems_mod_proj, *cmp_null, - *cmp_proj, *ems_mode_cond, *x_true, *x_false, *const_null; + ir_node *ems, *ems_mod, *ems_div, *ems_mod_proj, *cmp_null, + *ems_mode_cond, *x_true, *x_false, *const_null; ir_node *true_val, *false_val; ir_node *ins[2]; @@ -1574,6 +1684,7 @@ static void create_duffs_block(void) ir_node *count, *correction, *unroll_c; ir_node *cmp_bad_count, *good_count, *bad_count, *count_phi, *bad_count_neg; + ir_node *phi; mode = get_irn_mode(loop_info.end_val); const_null = new_Const(get_mode_null(mode)); @@ -1582,37 +1693,50 @@ static void create_duffs_block(void) * 1. Calculate first approach to count. * Condition: (end - start) % step == 0 */ block1 = clone_block_sans_bes(loop_head, loop_head); + DB((dbg, LEVEL_4, "Duff block 1 %N\n", block1)); /* Create loop entry phis in first duff block * as it becomes the loops preheader */ - if (loop_info.unroll_kind == invariant) { - ir_node *phi; - for_each_phi(loop_head, phi) { - ir_node *new_phi = clone_phis_sans_bes(phi, loop_head); - set_nodes_block(new_phi, block1); - } + for_each_phi(loop_head, phi) { + /* Returns phis pred if phi would have arity 1*/ + ir_node *new_phi = clone_phis_sans_bes(phi, loop_head, block1); + + DB((dbg, LEVEL_4, "HEAD %N phi %N\n", loop_head, phi)); + DB((dbg, LEVEL_4, "BLOCK1 %N phi %N\n", block1, new_phi)); } ems = new_r_Sub(block1, loop_info.end_val, loop_info.start_val, get_irn_mode(loop_info.end_val)); + DB((dbg, LEVEL_4, "BLOCK1 sub %N\n", ems)); + + + ems = new_Sub(loop_info.end_val, loop_info.start_val, + get_irn_mode(loop_info.end_val)); - ems_divmod = new_r_DivMod(block1, + DB((dbg, LEVEL_4, "mod ins %N %N\n", ems, loop_info.step)); + ems_mod = new_r_Mod(block1, new_NoMem(), ems, loop_info.step, mode, op_pin_state_pinned); + ems_div = new_r_Div(block1, + new_NoMem(), + ems, + loop_info.step, + mode, + op_pin_state_pinned); + + DB((dbg, LEVEL_4, "New module node %N\n", ems_mod)); - ems_mod_proj = new_r_Proj(ems_divmod, mode, pn_DivMod_res_mod); - cmp_null = new_r_Cmp(block1, ems_mod_proj, const_null); - cmp_proj = new_r_Proj(cmp_null, mode, pn_Cmp_Eq); - ems_mode_cond = new_Cond(cmp_proj); + ems_mod_proj = new_r_Proj(ems_mod, mode_Iu, pn_Mod_res); + cmp_null = new_r_Cmp(block1, ems_mod_proj, const_null, ir_relation_less); + ems_mode_cond = new_r_Cond(block1, cmp_null); /* ems % step == 0 */ - x_true = new_Proj(ems_mode_cond, mode_X, pn_Cond_true); + x_true = new_r_Proj(ems_mode_cond, mode_X, pn_Cond_true); /* ems % step != 0 */ - x_false = new_Proj(ems_mode_cond, mode_X, pn_Cond_false); - + x_false = new_r_Proj(ems_mode_cond, mode_X, pn_Cond_false); /* 2. Second block. * Assures, duffs device receives a valid count. @@ -1624,6 +1748,8 @@ static void create_duffs_block(void) ins[1] = x_false; count_block = new_Block(2, ins); + DB((dbg, LEVEL_4, "Duff block 2 %N\n", count_block)); + /* Increase loop-taken-count depending on the loop condition * uses the latest iv to compare to. */ @@ -1633,7 +1759,7 @@ static void create_duffs_block(void) /* ems % step != 0 : +1 */ false_val = new_Const(get_mode_one(mode)); } else { - tarval *tv_two = new_tarval_from_long(2, mode); + ir_tarval *tv_two = new_tarval_from_long(2, mode); /* ems % step == 0 : +1 */ true_val = new_Const(get_mode_one(mode)); /* ems % step != 0 : +2 */ @@ -1645,13 +1771,11 @@ static void create_duffs_block(void) correction = new_r_Phi(count_block, 2, ins, mode); - count = new_r_Proj(ems_divmod, mode, pn_DivMod_res_div); + count = new_r_Proj(ems_div, mode, pn_Div_res); /* (end - start) / step + correction */ count = new_Add(count, correction, mode); - cmp_bad_count = new_r_Cmp(count_block, count, const_null); - /* We preconditioned the loop to be tail-controlled. * So, if count is something 'wrong' like 0, * negative/positive (depending on step direction), @@ -1660,12 +1784,14 @@ static void create_duffs_block(void) /* Depending on step direction, we have to check for > or < 0 */ if (loop_info.decreasing == 1) { - bad_count_neg = new_r_Proj(cmp_bad_count, mode_X, pn_Cmp_Lt); + cmp_bad_count = new_r_Cmp(count_block, count, const_null, + ir_relation_less); } else { - bad_count_neg = new_r_Proj(cmp_bad_count, mode_X, pn_Cmp_Gt); + cmp_bad_count = new_r_Cmp(count_block, count, const_null, + ir_relation_greater); } - bad_count_neg = new_Cond(bad_count_neg); + bad_count_neg = new_r_Cond(count_block, cmp_bad_count); good_count = new_Proj(bad_count_neg, mode_X, pn_Cond_true); bad_count = new_Proj(ems_mode_cond, mode_X, pn_Cond_false); @@ -1675,15 +1801,10 @@ static void create_duffs_block(void) ins[0] = good_count; ins[1] = bad_count; duff_block = new_Block(2, ins); + DB((dbg, LEVEL_4, "Duff block 3 %N\n", duff_block)); - /* Matze: I commented this line out because I was in the process of - * removing the Abs node. I don't understand that line at all anyway - * since no other code here checks for the presence of an Abs or creates - * one. So how can we know here that "count" is an Abs node... */ -#if 0 - /* count wants to be positive */ - ins[0] = get_Abs_op(count); -#endif + /* Get absolute value */ + ins[0] = new_Abs(count, mode); /* Manually feed the aforementioned count = 1 (bad case)*/ ins[1] = new_Const(get_mode_one(mode)); count_phi = new_r_Phi(duff_block, 2, ins, mode); @@ -1698,8 +1819,10 @@ static void create_duffs_block(void) mode, op_pin_state_pinned); - proj = new_Proj(duff_mod, mode_X, pn_Mod_res); - cond = new_Cond(proj); + + proj = new_Proj(duff_mod, mode, pn_Mod_res); + /* condition does NOT create itself in the block of the proj! */ + cond = new_r_Cond(duff_block, proj); loop_info.duff_cond = cond; } @@ -1711,8 +1834,11 @@ static unsigned is_loop_invariant_def(ir_node *node) { int i; - if (! is_in_loop(node)) + if (! is_in_loop(node)) { + DB((dbg, LEVEL_4, "Not in loop %N\n", node)); + /* || is_Const(node) || is_SymConst(node)) {*/ return 1; + } /* If this is a phi of the loophead shared by more than 1 loop, * we need to check if all defs are not in the loop. */ @@ -1721,16 +1847,21 @@ static unsigned is_loop_invariant_def(ir_node *node) block = get_nodes_block(node); /* To prevent unexpected situations. */ - if (block != loop_head) + if (block != loop_head) { return 0; + } for (i = 0; i < get_irn_arity(node); ++i) { /* Check if all bes are just loopbacks. */ if (is_own_backedge(block, i) && get_irn_n(node, i) != node) return 0; } + DB((dbg, LEVEL_4, "invar %N\n", node)); + return 1; } - return 1; + DB((dbg, LEVEL_4, "Not invar %N\n", node)); + + return 0; } /* Returns 1 if one pred of node is invariant and the other is not. @@ -1744,20 +1875,29 @@ static unsigned get_invariant_pred(ir_node *node, ir_node **invar_pred, ir_node *other = NULL; if (is_loop_invariant_def(pred0)) { + DB((dbg, LEVEL_4, "pred0 invar %N\n", pred0)); *invar_pred = pred0; *other = pred1; } if (is_loop_invariant_def(pred1)) { - if (invar_pred != NULL) + DB((dbg, LEVEL_4, "pred1 invar %N\n", pred1)); + + if (*invar_pred != NULL) { /* RETURN. We do not want both preds to be invariant. */ return 0; + } *other = pred0; *invar_pred = pred1; return 1; } else { - return 0; + DB((dbg, LEVEL_4, "pred1 not invar %N\n", pred1)); + + if (*invar_pred != NULL) + return 1; + else + return 0; } } @@ -1829,7 +1969,6 @@ static unsigned get_const_pred(ir_node *node, ir_node **const_pred, ir_node **ot /*DB((dbg, LEVEL_4, "is %N const\n", pred0));*/ if (is_Const(pred0) || is_SymConst(pred0)) { - DB((dbg, LEVEL_1, "%N is constant\n", pred0)); *const_pred = pred0; *other = pred1; } @@ -1837,12 +1976,10 @@ static unsigned get_const_pred(ir_node *node, ir_node **const_pred, ir_node **ot /*DB((dbg, LEVEL_4, "is %N const\n", pred1));*/ if (is_Const(pred1) || is_SymConst(pred1)) { if (*const_pred != NULL) { - DB((dbg, LEVEL_1, "%N is ALSO constant\n", pred1)); /* RETURN. We do not want both preds to be constant. */ return 0; } - DB((dbg, LEVEL_4, "%N is constant\n", pred1)); *other = pred0; *const_pred = pred1; } @@ -1853,47 +1990,28 @@ static unsigned get_const_pred(ir_node *node, ir_node **const_pred, ir_node **ot return 1; } -/* Returns the mathematically inverted pn_Cmp. */ -static pn_Cmp get_math_inverted_case(pn_Cmp proj) -{ - switch(proj) { - case pn_Cmp_Eq: - return pn_Cmp_Lg; - case pn_Cmp_Lg: - return pn_Cmp_Eq; - case pn_Cmp_Lt: - return pn_Cmp_Ge; - case pn_Cmp_Le: - return pn_Cmp_Gt; - case pn_Cmp_Gt: - return pn_Cmp_Le; - case pn_Cmp_Ge: - return pn_Cmp_Lt; - default: - panic("Unhandled pn_Cmp."); - } -} - -/* norm_proj means we do not exit the loop. */ -static unsigned simulate_next(tarval **count_tar, - tarval *stepped, tarval *step_tar, tarval *end_tar, pn_Cmp norm_proj) +/* Returns 1 if loop exits within 2 steps of the iv. + * Norm_proj means we do not exit the loop.*/ +static unsigned simulate_next(ir_tarval **count_tar, + ir_tarval *stepped, ir_tarval *step_tar, ir_tarval *end_tar, + ir_relation norm_proj) { - tarval *next; + ir_tarval *next; - DB((dbg, LEVEL_1, "Loop taken if (stepped)%ld %s (end)%ld ", + DB((dbg, LEVEL_4, "Loop taken if (stepped)%ld %s (end)%ld ", get_tarval_long(stepped), - get_pnc_string((norm_proj)), + get_relation_string((norm_proj)), get_tarval_long(end_tar))); - DB((dbg, LEVEL_1, "comparing latest value %d\n", loop_info.latest_value)); + DB((dbg, LEVEL_4, "comparing latest value %d\n", loop_info.latest_value)); /* If current iv does not stay in the loop, * this run satisfied the exit condition. */ if (! (tarval_cmp(stepped, end_tar) & norm_proj)) return 1; - DB((dbg, LEVEL_1, "Result: (stepped)%ld IS %s (end)%ld\n", + DB((dbg, LEVEL_4, "Result: (stepped)%ld IS %s (end)%ld\n", get_tarval_long(stepped), - get_pnc_string(tarval_cmp(stepped, end_tar)), + get_relation_string(tarval_cmp(stepped, end_tar)), get_tarval_long(end_tar))); /* next step */ @@ -1903,11 +2021,11 @@ static unsigned simulate_next(tarval **count_tar, /* sub */ next = tarval_sub(stepped, step_tar, get_irn_mode(loop_info.end_val)); - DB((dbg, LEVEL_1, "Loop taken if %ld %s %ld ", + DB((dbg, LEVEL_4, "Loop taken if %ld %s %ld ", get_tarval_long(next), - get_pnc_string(norm_proj), + get_relation_string(norm_proj), get_tarval_long(end_tar))); - DB((dbg, LEVEL_1, "comparing latest value %d\n", loop_info.latest_value)); + DB((dbg, LEVEL_4, "comparing latest value %d\n", loop_info.latest_value)); /* Increase steps. */ *count_tar = tarval_add(*count_tar, get_tarval_one(get_tarval_mode(*count_tar))); @@ -1930,8 +2048,7 @@ static unsigned simulate_next(tarval **count_tar, static ir_node *is_simple_loop(void) { int arity, i; - unsigned loop_depth, max_loop_nodes_adapted; - ir_node *loop_block, *exit_block, *projx, *cond, *projres, *loop_condition; + ir_node *loop_block, *exit_block, *projx, *cond, *cmp; /* Maximum of one condition, and no endless loops. */ if (loop_info.cf_outs != 1) @@ -1939,25 +2056,21 @@ static ir_node *is_simple_loop(void) DB((dbg, LEVEL_4, "1 loop exit\n")); -#if 0 +#if LOOP_IGNORE_NODE_LIMITS /* Ignore loop size. Probably not wise in other than testcases. */ - (void) max_loop_nodes_adapted; - (void) loop_depth; - - loop_info.max_unroll = 6; + loop_info.max_unroll = 40; #else /* Calculate maximum unroll_nr keeping node count below limit. */ - loop_depth = get_loop_depth(cur_loop) - 1; - max_loop_nodes_adapted = get_max_nodes_adapted(loop_depth); - - loop_info.max_unroll = opt_params.max_loop_size / loop_info.nodes; + loop_info.max_unroll = (int)((double)opt_params.max_unrolled_loop_size / (double)loop_info.nodes); if (loop_info.max_unroll < 2) { count_stats(stats.too_large); return NULL; } #endif + + DB((dbg, LEVEL_4, "maximum unroll factor %u, to not exceed node limit \n", - loop_info.max_unroll)); + opt_params.max_unrolled_loop_size)); arity = get_irn_arity(loop_head); /* RETURN if we have more than 1 be. */ @@ -1990,13 +2103,12 @@ static ir_node *is_simple_loop(void) /* find value on which loop exit depends */ projx = loop_info.cf_out.pred; cond = get_irn_n(projx, 0); - projres = get_irn_n(cond, 0); - loop_condition = get_irn_n(projres, 0); + cmp = get_irn_n(cond, 0); - if (!is_Cmp(loop_condition)) + if (!is_Cmp(cmp)) return NULL; - DB((dbg, LEVEL_5, "projection is %s\n", get_pnc_string(get_Proj_proj(projx)))); + DB((dbg, LEVEL_5, "projection is %s\n", get_relation_string(get_Proj_proj(projx)))); switch(get_Proj_proj(projx)) { case pn_Cond_false: @@ -2010,8 +2122,7 @@ static ir_node *is_simple_loop(void) } DB((dbg, LEVEL_4, "Valid Cmp.\n")); - - return projres; + return cmp; } /* Returns 1 if all nodes are mode_Iu or mode_Is. */ @@ -2033,19 +2144,27 @@ static unsigned are_mode_I(ir_node *n1, ir_node* n2, ir_node *n3) static unsigned get_unroll_decision_invariant(void) { - ir_node *projres, *loop_condition, *iteration_path; - unsigned success, is_latest_val; - tarval *start_tar, *step_tar; - ir_mode *mode; + ir_node *projres, *loop_condition, *iteration_path; + unsigned success, is_latest_val; + ir_tarval *step_tar; + ir_mode *mode; + /* RETURN if loop is not 'simple' */ projres = is_simple_loop(); if (projres == NULL) return 0; + /* Use a minimal size for the invariant unrolled loop, + * as duffs device produces overhead */ + if (loop_info.nodes < opt_params.invar_unrolling_min_size) + return 0; + loop_condition = get_irn_n(projres, 0); success = get_invariant_pred(loop_condition, &loop_info.end_val, &iteration_path); + DB((dbg, LEVEL_4, "pred invar %d\n", success)); + if (! success) return 0; @@ -2059,7 +2178,7 @@ static unsigned get_unroll_decision_invariant(void) is_latest_val = 1; loop_info.add = iteration_path; - DB((dbg, LEVEL_4, "Got add %N (maybe not sane)\n", loop_info.add)); + DB((dbg, LEVEL_4, "Case 1: Got add %N (maybe not sane)\n", loop_info.add)); /* Preds of the add should be step and the iteration_phi */ success = get_const_pred(loop_info.add, &loop_info.step, &loop_info.iteration_phi); @@ -2079,7 +2198,7 @@ static unsigned get_unroll_decision_invariant(void) if (! success) return 0; - DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val)); + DB((dbg, LEVEL_4, "Got start A %N\n", loop_info.start_val)); } else if (is_Phi(iteration_path)) { ir_node *new_iteration_phi; @@ -2088,7 +2207,7 @@ static unsigned get_unroll_decision_invariant(void) is_latest_val = 0; loop_info.iteration_phi = iteration_path; - DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi)); + DB((dbg, LEVEL_4, "Case 2: Got phi %N\n", loop_info.iteration_phi)); /* Find start_val and add-node. * Does necessary sanity check of add, if it is already set. */ @@ -2096,14 +2215,14 @@ static unsigned get_unroll_decision_invariant(void) if (! success) return 0; - DB((dbg, LEVEL_4, "Got start %N\n", loop_info.start_val)); + DB((dbg, LEVEL_4, "Got start B %N\n", loop_info.start_val)); DB((dbg, LEVEL_4, "Got add or sub %N\n", loop_info.add)); success = get_const_pred(loop_info.add, &loop_info.step, &new_iteration_phi); if (! success) return 0; - DB((dbg, LEVEL_4, "Got step %N\n", loop_info.step)); + DB((dbg, LEVEL_4, "Got step (B) %N\n", loop_info.step)); if (loop_info.iteration_phi != new_iteration_phi) return 0; @@ -2127,7 +2246,6 @@ static unsigned get_unroll_decision_invariant(void) DB((dbg, LEVEL_4, "mode integer\n")); step_tar = get_Const_tarval(loop_info.step); - start_tar = get_Const_tarval(loop_info.start_val); if (tarval_is_null(step_tar)) { /* TODO Might be worth a warning. */ @@ -2143,9 +2261,9 @@ static unsigned get_unroll_decision_invariant(void) /* Returns unroll factor, * given maximum unroll factor and number of loop passes. */ -static unsigned get_preferred_factor_constant(tarval *count_tar) +static unsigned get_preferred_factor_constant(ir_tarval *count_tar) { - tarval *tar_6, *tar_5, *tar_4, *tar_3, *tar_2; + ir_tarval *tar_6, *tar_5, *tar_4, *tar_3, *tar_2; unsigned prefer; ir_mode *mode = get_irn_mode(loop_info.end_val); @@ -2227,19 +2345,22 @@ static unsigned get_preferred_factor_constant(tarval *count_tar) } /* Check if cur_loop is a simple counting loop. - * Start, step and end are constants. */ + * Start, step and end are constants. + * TODO The whole constant case should use procedures similar to + * the invariant case, as they are more versatile. */ /* TODO split. */ static unsigned get_unroll_decision_constant(void) { - ir_node *projres, *loop_condition, *iteration_path; - unsigned success, is_latest_val; - tarval *start_tar, *end_tar, *step_tar, *diff_tar, *count_tar, *stepped; - pn_Cmp proj_proj, norm_proj; - ir_mode *mode; + ir_node *cmp, *iteration_path; + unsigned success, is_latest_val; + ir_tarval *start_tar, *end_tar, *step_tar, *diff_tar, *count_tar; + ir_tarval *stepped; + ir_relation proj_proj, norm_proj; + ir_mode *mode; /* RETURN if loop is not 'simple' */ - projres = is_simple_loop(); - if (projres == NULL) + cmp = is_simple_loop(); + if (cmp == NULL) return 0; /* One in of the loop condition needs to be loop invariant. => end_val @@ -2257,9 +2378,7 @@ static unsigned get_unroll_decision_constant(void) /\ */ - loop_condition = get_irn_n(projres, 0); - - success = get_const_pred(loop_condition, &loop_info.end_val, &iteration_path); + success = get_const_pred(cmp, &loop_info.end_val, &iteration_path); if (! success) return 0; @@ -2273,7 +2392,7 @@ static unsigned get_unroll_decision_constant(void) is_latest_val = 1; loop_info.add = iteration_path; - DB((dbg, LEVEL_4, "Got add %N (maybe not sane)\n", loop_info.add)); + DB((dbg, LEVEL_4, "Case 2: Got add %N (maybe not sane)\n", loop_info.add)); /* Preds of the add should be step and the iteration_phi */ success = get_const_pred(loop_info.add, &loop_info.step, &loop_info.iteration_phi); @@ -2302,7 +2421,7 @@ static unsigned get_unroll_decision_constant(void) is_latest_val = 0; loop_info.iteration_phi = iteration_path; - DB((dbg, LEVEL_4, "Got phi %N\n", loop_info.iteration_phi)); + DB((dbg, LEVEL_4, "Case 1: Got phi %N \n", loop_info.iteration_phi)); /* Find start_val and add-node. * Does necessary sanity check of add, if it is already set. */ @@ -2365,7 +2484,7 @@ static unsigned get_unroll_decision_constant(void) /* Iv will not pass end_val (except overflows). * Nothing done, as it would yield to no advantage. */ if (tarval_is_negative(count_tar)) { - DB((dbg, LEVEL_1, "Loop is endless or never taken.")); + DB((dbg, LEVEL_4, "Loop is endless or never taken.")); /* TODO Might be worth a warning. */ return 0; } @@ -2391,17 +2510,16 @@ static unsigned get_unroll_decision_constant(void) DB((dbg, LEVEL_4, "stepped to %ld\n", get_tarval_long(stepped))); - proj_proj = get_Proj_proj(projres); + proj_proj = get_Cmp_relation(cmp); /* Assure that norm_proj is the stay-in-loop case. */ if (loop_info.exit_cond == 1) - norm_proj = get_math_inverted_case(proj_proj); + norm_proj = get_negated_relation(proj_proj); else norm_proj = proj_proj; - DB((dbg, LEVEL_4, "normalized projection %s\n", get_pnc_string(norm_proj))); - + DB((dbg, LEVEL_4, "normalized projection %s\n", get_relation_string(norm_proj))); /* Executed at most once (stay in counting loop if a Eq b) */ - if (norm_proj == pn_Cmp_Eq) + if (norm_proj == ir_relation_equal) /* TODO Might be worth a warning. */ return 0; @@ -2434,6 +2552,28 @@ static unsigned get_unroll_decision_constant(void) */ static void unroll_loop(void) { + + if (! (loop_info.nodes > 0)) + return; + +#if LOOP_IGNORE_NODE_LIMITS + DB((dbg, LEVEL_1, "WARNING: Loop node limitations ignored.")); +#else + if (loop_info.nodes > opt_params.max_unrolled_loop_size) { + DB((dbg, LEVEL_2, "Nodes %d > allowed nodes %d\n", + loop_info.nodes, opt_params.max_unrolled_loop_size)); + count_stats(stats.too_large); + return; + } + + if (loop_info.calls > 0) { + DB((dbg, LEVEL_2, "Calls %d > allowed calls 0\n", + loop_info.calls)); + count_stats(stats.calls_limit); + return; + } +#endif + unroll_nr = 0; /* get_unroll_decision_constant and invariant are completely @@ -2454,7 +2594,7 @@ static void unroll_loop(void) loop_info.unroll_kind = invariant; } - DB((dbg, LEVEL_1, " *** Unrolling %d times ***\n", unroll_nr)); + DB((dbg, LEVEL_2, " *** Unrolling %d times ***\n", unroll_nr)); if (unroll_nr > 1) { loop_entries = NEW_ARR_F(entry_edge, 0); @@ -2462,10 +2602,14 @@ static void unroll_loop(void) /* Get loop outs */ irg_walk_graph(current_ir_graph, get_loop_entries, NULL, NULL); - if ((int)get_tarval_long(loop_info.count_tar) == unroll_nr) - loop_info.needs_backedge = 0; - else + if (loop_info.unroll_kind == constant) { + if ((int)get_tarval_long(loop_info.count_tar) == unroll_nr) + loop_info.needs_backedge = 0; + else + loop_info.needs_backedge = 1; + } else { loop_info.needs_backedge = 1; + } /* Use phase to keep copy of nodes from the condition chain. */ phase = new_phase(current_ir_graph, phase_irn_init_default); @@ -2476,11 +2620,12 @@ static void unroll_loop(void) /* Line up the floating copies. */ place_copies(unroll_nr - 1); - /* Remove phis with 1 in*/ + /* Remove phis with 1 in + * If there were no nested phis, this would not be necessary. + * Avoiding the creation in the first place + * leads to complex special cases. */ irg_walk_graph(current_ir_graph, correct_phis, NULL, NULL); - /* dump_ir_block_graph(current_ir_graph, "-DONE"); */ - if (loop_info.unroll_kind == constant) count_stats(stats.constant_unroll); else @@ -2500,14 +2645,6 @@ static void unroll_loop(void) * Decides if loop will be processed. */ static void init_analyze(ir_loop *loop) { - /* Expect no benefit of big loops. */ - /* TODO tuning/make parameter */ - int loop_depth; - unsigned max_loop_nodes = opt_params.max_loop_size; - unsigned max_loop_nodes_adapted; - int max_calls = opt_params.allowed_calls; - int depth_adaption = opt_params.depth_adaption; - cur_loop = loop; loop_head = NULL; @@ -2522,46 +2659,6 @@ static void init_analyze(ir_loop *loop) /* Collect loop informations: head, node counts. */ irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL); - /* Depth of 0 is the procedure and 1 a topmost loop. */ - loop_depth = get_loop_depth(loop) - 1; - - /* Calculating in per mil. */ - max_loop_nodes_adapted = get_max_nodes_adapted(loop_depth); - - DB((dbg, LEVEL_1, "max_nodes: %d\nmax_nodes_adapted %d at depth of %d (adaption %d)\n", - max_loop_nodes, max_loop_nodes_adapted, loop_depth, depth_adaption)); - - if (! (loop_info.nodes > 0)) - return; - -#if LOOP_IGNORE_NODE_LIMITS - DB((dbg, LEVEL_1, "WARNING: Loop node limitations ignored.")); -#else - if (loop_info.nodes > max_loop_nodes) { - /* Only for stats */ - DB((dbg, LEVEL_1, "Nodes %d > allowed nodes %d\n", - loop_info.nodes, loop_depth, max_loop_nodes)); - count_stats(stats.too_large); - /* no RETURN */ - /* Adaption might change it */ - } - - /* Limit processing to loops smaller than given parameter. */ - if (loop_info.nodes > max_loop_nodes_adapted) { - DB((dbg, LEVEL_1, "Nodes %d > allowed nodes (depth %d adapted) %d\n", - loop_info.nodes, loop_depth, max_loop_nodes_adapted)); - count_stats(stats.too_large_adapted); - return; - } - - if (loop_info.calls > opt_params.allowed_calls) { - DB((dbg, LEVEL_1, "Calls %d > allowed calls %d\n", - loop_info.calls, max_calls)); - count_stats(stats.calls_limit); - return; - } -#endif - /* RETURN if there is no valid head */ if (!loop_head || !loop_head_valid) { DB((dbg, LEVEL_1, "No valid loop head. Nothing done.\n")); @@ -2570,6 +2667,13 @@ static void init_analyze(ir_loop *loop) DB((dbg, LEVEL_1, "Loophead: %N\n", loop_head)); } + if (loop_info.branches > opt_params.max_branches) { + DB((dbg, LEVEL_1, "Branches %d > allowed branches %d\n", + loop_info.branches, opt_params.max_branches)); + count_stats(stats.calls_limit); + return; + } + switch (loop_op) { case loop_op_inversion: loop_inversion(); @@ -2590,36 +2694,45 @@ static void init_analyze(ir_loop *loop) static void find_innermost_loop(ir_loop *loop) { /* descend into sons */ - int sons = get_loop_n_sons(loop); + size_t sons = get_loop_n_sons(loop); if (sons == 0) { ARR_APP1(ir_loop *, loops, loop); } else { - int s; - for (s=0; s>> inversion done (Startnode %N) <<<\n", get_irg_start(irg))); }