X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fopt%2Floop.c;h=7aceb2dc2423e19805d91f03513b69cb2eac03ec;hb=e0e9e9ace61d3ec46e4d09c7ab2c6947b17b2778;hp=3f95d9c188c287111dc94b7d33913c2ace55e890;hpb=b0196335c375cbed13e8e1fea996a7651885db18;p=libfirm diff --git a/ir/opt/loop.c b/ir/opt/loop.c index 3f95d9c18..7aceb2dc2 100644 --- a/ir/opt/loop.c +++ b/ir/opt/loop.c @@ -19,364 +19,308 @@ /** * @file - * @brief Loop peeling and unrolling * @author Christian Helmer + * @brief loop inversion and loop unrolling, loop peeling + * * @version $Id$ */ - -//#include "config.h" - -//#include -#include +#include "config.h" #include "irnode.h" -#include "irnode_t.h" -#include "irgraph_t.h" -//#include "irprog_t.h" +#include "debug.h" -//#include "iroptimize.h" -#include "ircons_t.h" -#include "iropt_t.h" +#include "ircons.h" #include "irgopt.h" -//#include "irgmod.h" +#include "irgmod.h" #include "irgwalk.h" - -//#include "array_t.h" -#include "list.h" -//#include "pset.h" -//#include "pmap.h" -//#include "pdeq.h" -//#include "xmalloc.h" -//#include "pqueue.h" - #include "irouts.h" -#include "irloop_t.h" -#include "irbackedge_t.h" -//#include "opt_inline_t.h" -//#include "cgana.h" -//#include "trouts.h" -//#include "error.h" - -//#include "analyze_irg_args.h" -#include "iredges_t.h" -//#include "irflag_t.h" -//#include "irhooks.h" +#include "iredges.h" #include "irtools.h" -//#include "iropt_dbg.h" -#include "irpass_t.h" -#include "irloop.h" +#include "array_t.h" /* automatic array */ +#include "beutil.h" /* get_block */ +#include "irloop_t.h" /* set_irn_loop*/ + +#if 0 + #include "irdump_t.h" +#endif -#include "array_t.h" -#include "irdump.h" -/* convenience macro for iterating over every phi node of the given block */ +DEBUG_ONLY(static firm_dbg_module_t *dbg); + +/** + * Convenience macro for iterating over every phi node of the given block. + * Requires phi list per block. + */ #define for_each_phi(block, phi) \ for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) ) /* current loop */ -ir_loop *cur_loop; +static ir_loop *cur_loop; -/* The loop walker should be possible to abort if nothing can be done anymore */ +/* abortable walker function */ typedef unsigned irg_walk_func_abortable(ir_node *, void *); -/* stores pair of node and number for the nodes predecessor */ -typedef struct loop_entry_t { - ir_node *node; /* node outside of the loop */ - int pred_irn_n; /* with pred_irn_n pointing inside loop */ - //loop_entry_t *next; -} loop_entry_t; +/* condition for walking a node during a copy_walk */ +typedef unsigned walker_condition(ir_node *); -//loop_entry_t loop_entry_list; +/* node and position of a predecessor */ +typedef struct out_edges { + ir_node *node; + int pred_irn_n; +} out_edge; -/* Store complex values in the nodes link */ -typedef struct link_node_state_t { - unsigned cloned:1; - unsigned temp:1; /* < Node is temporarily copied, to resolve cycles */ +/* access complex values through the nodes links */ +typedef struct node_info { unsigned invariant:1; ir_node *copy; - ir_node *link; /*< temporary links for ssa creation */ - ir_node **ins; /* ins for phi nodes, during rewiring of blocks */ -} link_node_state_t; - - -loop_entry_t *loop_entries; /* loop entries (from below) in the node graph */ -//int loop_entries_n; -loop_entry_t *head_entries; /* loop entries (from below) in the node graph */ -int backedges_n; -//loop_entry_t *backedges; /* backedges exclusively from the current loop */ -//loop_entry_t *alien_backedges; /* The head can be head of several loops. */ -//loop_entry_t *head_edges; /* The head can be head of several loops. */ - -ir_node *loop_cf_head = NULL; /* loop exit in the node graph */ -unsigned loop_cf_head_valid = 1; /* a loop may/must have one head, otherwise invalid */ - -unsigned has_sto; /* If we store inside the loop we might - * have disambiguation problems */ -//DBG -//void arrdump(ir_node **arr) -//{ -// int i; -// for (i=0; inode_nr), is_Block(arr[i])); -// } -//} + ir_node *link; /* temporary links for ssa creation */ + ir_node **ins; /* ins for phi nodes, during rewiring of blocks */ + unsigned done; + struct node_info *freelistnext; /* linked list to free all node_infos */ +} node_info; + +static node_info *link_node_state_list; /* head of the linked list to free all node_infos */ + +static out_edge *cur_loop_outs; /* A walker may start visiting the current loop with these nodes. */ +static out_edge *cur_head_outs; /* A walker may start visiting the cur head with these nodes. */ + +static ir_node *loop_cf_head = NULL; /* Loop head node */ +static unsigned loop_cf_head_valid = 1; /* A loop may have one head, otherwise we do not touch it. */ + +ir_node **loops; + +/* Inverted head */ +static ir_node *loop_inv_head = NULL; +/* Peeled head */ +static ir_node *loop_peeled_head = NULL; + +/* Loop analysis informations */ +typedef struct loop_info_t { + unsigned blocks; /* number of blocks in the loop */ + unsigned calls; /* number of calls */ + unsigned loads; /* number of load nodes */ + unsigned outs; /* outs without keepalives */ +#if 0 + unsigned invariant_loads; + unsigned stores; /* number of store nodes */ + unsigned opnodes_n; /* nodes that probably result in an instruction */ + unsigned do_invariant_opt; +#endif +} loop_info_t; + +/* Information about the current loop */ +static loop_info_t loop_info; + +/* A walker may start visiting a condition chain with these nodes. */ +static out_edge *cond_chain_entries; + +/* Number of unrolling */ +int unroll_times; + +static unsigned head_inversion_node_count; +static unsigned inversion_head_node_limit; +static unsigned head_inversion_block_count; + +static unsigned enable_peeling; +static unsigned enable_inversion; +static unsigned enable_unrolling; /** - * Returns the state of the given node. + * + * ============= AUXILIARY FUNCTIONS ===================================== */ -link_node_state_t *get_lstate(ir_node *n) -{ - return ((link_node_state_t *)n->link); -} + /** - * Returns the link inside of the nodes state which is pointing to its copy - * most of the time during loop peeling. + * Creates object on the heap, and adds it to a linked list to free it later. */ -ir_node *get_copy(ir_node *n) -{ - return ((link_node_state_t *)n->link)->copy; +static node_info *new_node_info(void) { + node_info *l = XMALLOCZ(node_info); + l->freelistnext = link_node_state_list; + link_node_state_list = l; + l->copy = NULL; + l->invariant = 0; + return l; } -/** - * Sets the nodes copy information - */ -void set_copy(ir_node *n, ir_node *copy) +static node_info *get_node_info(ir_node *n) { - ((link_node_state_t *)n->link)->copy = copy; + return ((node_info *)get_irn_link(n)); } -/** - * Returns true if the node or block is in cur_loop. - */ -unsigned is_in_loop(ir_node *node) +/* Allocates a node_info struct for the given node. For use with a walker. */ +static void alloc_node_info(ir_node *node, void *env) { -// if (is_Block(node)) { -// if (node->loop == cur_loop) { -// printf(" INLOOP %ld \n", node->node_nr); -// } -// return (node->loop == cur_loop); -// } else { -// if ( get_nodes_block(node)->loop == cur_loop ) { -// printf(" INLOOP %ld \n", node->node_nr); -// } -// return ( get_nodes_block(node)->loop == cur_loop ); -// } - if (is_Block(node)) { - return (node->loop == cur_loop); - } else { - return ( get_nodes_block(node)->loop == cur_loop ); - } + node_info *state; + (void) env; + state = new_node_info(); + set_irn_link(node, (void *)state); } -unsigned is_in_head(ir_node *node) +static void free_node_info(void) { - if (is_Block(node)) { - return (node == loop_cf_head); - } else { - return ( get_nodes_block(node) == loop_cf_head ); + int a = 0; + node_info *n; + n = link_node_state_list; + while(n) { + node_info *next = n->freelistnext; + ++a; + xfree(n); + n = next; } + link_node_state_list = NULL; } /** - * Returns if the given be is an alien edge + * Use the linked list to reset the reused values of all node_infos + * Reset in particular the copy attribute as copy_walk uses it to determine a present copy */ -unsigned is_alien_edge(ir_node *n, int i) +static void reset_node_infos(void) { - return( !is_in_loop( get_irn_n( n, i ) ) ); + node_info *next; + next = link_node_state_list; + while(next->freelistnext) { + node_info *cur = next; + next = cur->freelistnext; + cur->copy = NULL; + cur->ins = NULL; + cur->link = NULL; + } } -static void add_pred(ir_node* node, ir_node* x) +/* Returns the nodes node_info link. */ +static ir_node *get_link(ir_node *n) { - ir_node** ins; - int n; - int i; - -// if(!node) -// printf("NONODE\n"); - - //printf("addpred %ld pred %ld \n", node->node_nr, x->node_nr); - - // WHY limit it to blocks and phi? - assert(is_Block(node) || is_Phi(node)); - - n = get_irn_arity(node); - NEW_ARR_A(ir_node*, ins, n + 1); - for (i = 0; i < n; i++) - ins[i] = get_irn_n(node, i); - ins[n] = x; - set_irn_in(node, n + 1, ins); + return ((node_info *)get_irn_link(n))->link; } -void block_phi_walker(ir_node *n, void *env) +/* Sets the nodes node_info link. */ +static void set_link(ir_node *n, ir_node *link) { - const ir_edge_t *edge; - (void) env; - - /* RETURN */ - if (!is_Block(n)) - return; + ((node_info *)get_irn_link(n))->link = link; +} - /* generate phi list for every block */ - n->attr.block.phis = NULL; +/* Returns a nodes copy. */ +static ir_node *get_copy(ir_node *n) +{ + return ((node_info *)get_irn_link(n))->copy; +} - foreach_out_edge(n, edge) { - ir_node *src = get_edge_src_irn(edge); - if (is_Phi(src)) - { - //printf("%ld has phi %ld \n", block->node_nr, src->node_nr); - add_Block_phi(n, src); - } - } +/* Sets a nodes copy. */ +static void set_copy(ir_node *n, ir_node *copy) +{ + ((node_info *)get_irn_link(n) )->copy = copy; } /** - * Calls func() for every block in the given loop. + * Convenience macro for iterating over every copy in a linked list + * of copies. */ -void for_each_loop_block(ir_loop *loop, irg_walk_func *func, void *env) -{ - int elements, e; - elements = get_loop_n_elements(loop); - - for(e=0; enode_nr); - func(elem.node, env); - } - } -} +#define for_each_copy(node) \ + for ( ; (node) ; (node) = get_copy(node)) /** - * collects the blocks backedges and creates the phi list for every block + * Convenience macro for iterating over every copy in 2 linked lists + * of copies in parallel. */ -void collect_backedges(ir_node *block, void *env) -{ - (void) env; +#define for_each_copy2(high1, low1, high2, low2) \ + for ( ; (low1) && (low2); (high1) = (low1), (low1) = get_copy(low1), \ + (high2) = (low2), (low2) = get_copy(low2)) - //printf("LOOP BLOCK %ld\n", block->node_nr); +/* + * Returns 0 if the node or block is not in cur_loop. + */ +static unsigned is_in_loop(ir_node *node) +{ + return (get_irn_loop(get_block(node)) == cur_loop); +} - /* collect backedges */ - if (has_backedges(block)) - { - int i; - int arity = get_irn_arity(block); +/* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */ +static unsigned is_alien_edge(ir_node *n, int i) +{ + return(!is_in_loop(get_irn_n(n, i))); +} - for(i = 0; i < arity; ++i) { - ir_node *pred = get_irn_n(block, i); +/* used for block walker */ +static void reset_block_mark(ir_node *node, void * env) +{ + (void) env; - loop_entry_t be; - be.node = block; - be.pred_irn_n = i; + if (is_Block(node)) + set_Block_mark(node, 0); +} - //ARR_APP1(loop_entry_t, head_edges, be); +static unsigned is_nodesblock_marked(ir_node* node) +{ + return (get_Block_mark(get_block(node))); +} - if (is_backedge(block, i) ) - { - if ( is_in_loop(pred) ) { - //printf("be: %ld --> %ld \n", block->node_nr, pred->node_nr); - //ARR_APP1(loop_entry_t, backedges, be); - ++backedges_n; - } -// else { -// //printf("alien be: %ld --> %ld \n", block->node_nr, pred->node_nr); -// ARR_APP1(loop_entry_t, alien_backedges, be); -// } - } -// else { -// if ( !is_in_loop(pred) ) { -// ARR_APP1(loop_entry_t, head_edges, be); -// } +/* Returns the number of blocks in a loop. */ +int get_loop_n_blocks(ir_loop *loop) { + int elements, e; + int blocks = 0; + elements = get_loop_n_elements(loop); - } + for(e=0; evisited); + block_arity = get_irn_arity(node); - if (node->op != op_Block) { - ir_node *pred = get_irn_n(node, -1); - if (pred->visited < irg->visited) - { - stop = loop_walker_rec(pred, pre, post, env); - if (stop) - return stop; - } - } + NEW_ARR_A(ir_node*, ins, block_arity + 1); - for (i = get_irn_arity(node) - 1; i >= 0; --i) { - ir_node *pred = get_irn_n(node, i); - if (pred->visited < irg->visited) - { - stop = loop_walker_rec(pred, pre, post, env); - if (stop) - return stop; - } + for (i = 0; i < block_arity; ++i) { + ins[i] = get_irn_n(node, i); } + ins[block_arity] = newpred; - if (post) - return post(node, env); - return 0; -} + set_irn_in(node, block_arity + 1, ins); -/** - * Walks through loop nodes. - * The entries of the loop (all edges pointing into the loop) have to be given. - */ -unsigned loop_walker(loop_entry_t *entries, - irg_walk_func_abortable *pre, irg_walk_func_abortable *post, void * env) -{ - int i; - int stop = 0; + for_each_phi(node, phi) { + int phi_arity = get_irn_arity(phi); + DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %ld\n", get_irn_node_nr(phi))); - for (i=0; !stop && i inloop %ld (@ %d) \n", -// node->node_nr, pred->node_nr, i); + pred_in_loop = is_in_loop(pred); + node_in_loop = is_in_loop(node); - ARR_APP1(loop_entry_t, loop_entries, entry); + if (pred_in_loop && !node_in_loop) { + out_edge entry; + entry.node = node; + entry.pred_irn_n = i; + ARR_APP1(out_edge, cur_loop_outs, entry); } } } -///** -// * Finds invariant nodes and marks them as invariant. -// * (Post walk) -// */ -//unsigned get_invariants(ir_node *node, void *env) -//{ -// unsigned invar = 1; -// (void) env; -// -// if (is_Store(node)) -// { -// has_sto = 1; -// /* RETURN and abort walker */ -// return 1; -// } -// -// int arity = get_irn_arity(node); -// -// /* RETURN, no preds to visit */ -// if (arity == 0) return 0; -// -// if (is_Load(node)) -// { -// assert(arity>=2 && "expected load to have edge nr 1 (address)"); -// -// ir_node *pred = get_irn_n(node, 1); -// if (!is_in_loop(pred) /* Everything outside the loop is considered invariant */ -// || is_Const(pred) /* This is not true, but we also want the quasi-invariants. */ -// || is_SymConst(pred) -// || get_lstate(node)->invariant) -// { -// //printf("## CONSTLOAD: %ld \n", node->node_nr); -// get_lstate(node)->invariant = 1; -// } else -// { -// get_lstate(node)->invariant = 0; -// } -// } -// else -// { -// int i; -// invar = 1; -// /* find loop variant preds */ -// for(i = 0; i < arity; ++i) -// { -// ir_node *pred = get_irn_n(node, i); -// -// if ( !(!is_in_loop(pred) /* outside loop is loop invariant */ -// || is_Const(pred) /* constants */ -// || is_SymConst(pred) /* SymConst, if no Store */ -// || get_lstate(node)->invariant /* pred is marked as invariant */ -// ) ) -// { -// invar = 0; -// } -// } -// -// if (invar) { -// printf("const: %ld \n", node->node_nr); -// get_lstate(node)->invariant = 1; -// } else { -// get_lstate(node)->invariant = 0; -// } -//// DBG -//// if (!is_nodes_block_marked(pred)) { -//// //printf("pred outloop: %ld, pred %ld (const)\n", node->node_nr, pred->node_nr); -//// } else if (is_Const(pred) || is_SymConst(pred)) // || is_Phi(pred)) { -//// //printf("predconst: %ld, pred %ld CONST\n", node->node_nr, pred->node_nr); -//// } else if (pred->link == MARKED_CONST) { -//// //printf("predmarked: %ld, pred %ld const\n", node->node_nr, pred->node_nr); -//// } else { -//// mark=0; -//// } -// } -// return 0; -//} - -////TODO DBG Remove -//void phifix(ir_node *node, ir_node *newpred) -//{ -// ir_node *phi=get_Block_phis(node); -// while(phi) -// { -// int pa = get_irn_arity(phi); -// int ba = get_irn_arity(node); -// -// -// -// while(ba>pa) -// { -// printf("!!!!!!!!!! block has %d, phi had %d\n", ba, pa ); -// add_pred(phi, newpred); -// pa++; -// printf("!!!!!!!!!! block has %d, phi has now %d\n", ba, pa ); -// } -// phi=get_Phi_next(phi); -// } -//} - static ir_node *ssa_second_def; static ir_node *ssa_second_def_block; /** - * + * Walks the graph bottom up, searching for definitions and creates phis. */ -static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, - int first) +static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode) { int i; int n_cfgpreds; @@ -535,22 +388,22 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, ir_node *phi; ir_node **in; - /* This is needed because we create bads sometimes */ - if (is_Bad(block)) + DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %ld\n", get_irn_node_nr(block))); + + /* Prevents creation of phi that would be bad anyway. + * Dead and bad blocks. */ + if (get_irn_arity(block) < 1 || is_Bad(block)) return new_Bad(); - /* the other defs can't be marked for cases where a user of the original - * value is in the same block as the alternative definition. - * In this case we mustn't use the alternative definition. - * So we keep a flag that indicated wether we walked at least 1 block - * away and may use the alternative definition */ - if (block == ssa_second_def_block && !first) { + if (block == ssa_second_def_block) { + DB((dbg, LEVEL_5, "ssa found second definition: use second def %ld\n", get_irn_node_nr(ssa_second_def))); return ssa_second_def; } /* already processed this block? */ if (irn_visited(block)) { - ir_node *value = get_lstate(block)->link; + ir_node *value = get_link(block); + DB((dbg, LEVEL_5, "ssa already visited: use linked %ld\n", get_irn_node_nr(value))); return value; } @@ -561,11 +414,14 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, n_cfgpreds = get_Block_n_cfgpreds(block); if (n_cfgpreds == 1) { ir_node *pred_block = get_Block_cfgpred_block(block, 0); - ir_node *value = search_def_and_create_phis(pred_block, mode, 0); + ir_node *value; + + DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %ld\n", get_irn_node_nr(pred_block))); - get_lstate(block)->link = value; - //set_irn_link(block, value); + value = search_def_and_create_phis(pred_block, mode); + set_link(block, value); mark_irn_visited(block); + return value; } @@ -575,15 +431,26 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, in[i] = new_Unknown(mode); phi = new_r_Phi(block, n_cfgpreds, in, mode); - //set_irn_link(block, phi); - get_lstate(block)->link = phi; + + /* Important: always keep block phi list up to date. */ + add_Block_phi(block, phi); + /* EVERY node is assumed to have a node_info linked. */ + alloc_node_info(phi, NULL); + + DB((dbg, LEVEL_5, "ssa phi creation: link new phi %ld to block %ld\n", get_irn_node_nr(phi), get_irn_node_nr(block))); + + set_link(block, phi); mark_irn_visited(block); /* set Phi predecessors */ for(i = 0; i < n_cfgpreds; ++i) { + ir_node *pred_val; ir_node *pred_block = get_Block_cfgpred_block(block, i); - ir_node *pred_val = search_def_and_create_phis(pred_block, mode, 0); + assert(pred_block != NULL); + pred_val = search_def_and_create_phis(pred_block, mode); + assert(pred_val != NULL); + DB((dbg, LEVEL_5, "ssa phi pred:phi %ld, pred %ld\n", get_irn_node_nr(phi), get_irn_node_nr(pred_val))); set_irn_n(phi, i, pred_val); } return phi; @@ -592,7 +459,7 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, /** * Given a set of values this function constructs SSA-form for the users of the * first value (the users are determined through the out-edges of the value). - * Uses the irn_visited flags. Works without using the dominance tree. + * Works without using the dominance tree. */ static void construct_ssa(ir_node *orig_block, ir_node *orig_val, ir_node *second_block, ir_node *second_val) @@ -602,16 +469,20 @@ static void construct_ssa(ir_node *orig_block, ir_node *orig_val, const ir_edge_t *edge; const ir_edge_t *next; + assert(orig_block && orig_val && second_block && second_val && + "no parameter of construct_ssa may be NULL"); + /* no need to do anything */ if (orig_val == second_val) return; irg = get_irn_irg(orig_val); + + ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED); inc_irg_visited(irg); mode = get_irn_mode(orig_val); - get_lstate(orig_block)->link = orig_val; - //set_irn_link(orig_block, orig_val); + set_link(orig_block, orig_val); mark_irn_visited(orig_block); ssa_second_def_block = second_block; @@ -628,604 +499,1089 @@ static void construct_ssa(ir_node *orig_block, ir_node *orig_val, if (is_End(user)) continue; - //DB((dbg, LEVEL_3, ">>> Fixing user %+F (pred %d == %+F)\n", user, j, get_irn_n(user, j))); + DB((dbg, LEVEL_5, "original user %ld\n", get_irn_node_nr(user))); if (is_Phi(user)) { ir_node *pred_block = get_Block_cfgpred_block(user_block, j); - newval = search_def_and_create_phis(pred_block, mode, 1); + newval = search_def_and_create_phis(pred_block, mode); } else { - newval = search_def_and_create_phis(user_block, mode, 1); + newval = search_def_and_create_phis(user_block, mode); } - /* don't fix newly created Phis from the SSA construction */ - if (newval != user) { - //DB((dbg, LEVEL_4, ">>>> Setting input %d of %+F to %+F\n", j, user, newval)); + /* If we get a bad node the user keeps the original in. No second definition needed. */ + if (newval != user && !is_Bad(newval)) set_irn_n(user, j, newval); + } + + ir_free_resources(irg, IR_RESOURCE_IRN_VISITED); +} + +/* + * Construct SSA for def and all of its copies. + */ +static void construct_ssa_n(ir_node *def, ir_node *user) +{ + ir_graph *irg; + ir_mode *mode; + ir_node *iter = def; + const ir_edge_t *edge; + const ir_edge_t *next; + irg = get_irn_irg(def); + + ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED); + inc_irg_visited(irg); + + mode = get_irn_mode(def); + + for_each_copy(iter) { + set_link(get_nodes_block(iter), iter); + mark_irn_visited(get_nodes_block(iter)); + + DB((dbg, LEVEL_5, "ssa_n: Link def %ld to block %ld\n", + get_irn_node_nr(iter), get_irn_node_nr(get_nodes_block(iter)))); + } + + /* Need to search the outs, because we need the in-pos on the user node. */ + foreach_out_edge_safe(def, edge, next) { + ir_node *edge_user = get_edge_src_irn(edge); + int edge_src = get_edge_src_pos(edge); + ir_node *user_block = get_nodes_block(user); + ir_node *newval; + + if (edge_user != user) + continue; + + if (is_Phi(user)) { + ir_node *pred_block = get_Block_cfgpred_block(user_block, edge_src); + newval = search_def_and_create_phis(pred_block, mode); + } else { + newval = search_def_and_create_phis(user_block, mode); } + + if (newval != user && !is_Bad(newval)) + set_irn_n(user, edge_src, newval); } + + ir_free_resources(irg, IR_RESOURCE_IRN_VISITED); } +/** + * Construct SSA for all definitions in arr. + */ +void construct_ssa_foreach(ir_node **arr, int arr_n) +{ + int i; + for(i = 0; i < arr_n; ++i) { + ir_node *cppred, *block, *cpblock, *pred; + + pred = arr[i]; + cppred = get_copy(pred); + block = get_nodes_block(pred); + cpblock = get_nodes_block(cppred); + construct_ssa(block, pred, cpblock, cppred); + } +} +/* get the number of backedges without alien bes */ +static int get_backedge_n(ir_node *loophead, unsigned with_alien) +{ + int i; + int be_n = 0; + int arity = get_irn_arity(loophead); + for (i = 0; i < arity; ++i) { + ir_node *pred = get_irn_n(loophead, i); + if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred))) + ++be_n; + } + return be_n; +} /** - * Rewires the heads after peeling. This results in a tail-controlled loop. + * Rewires the heads after peeling. */ -void fix_head(ir_node *loophead) +static void peel_fix_heads(void) { + ir_node **loopheadnins, **peelheadnins; + ir_node *loophead = loop_cf_head; + ir_node *peelhead = get_copy(loophead); + int headarity = get_irn_arity(loophead); - int i; - ir_node **loopheadnins; - ir_node **peelheadnins; ir_node *phi; - ir_node *peelhead = get_copy(loophead); + int i; + int lheadin_c = 0; int pheadin_c = 0; - /** - * the loopheads new preds are: - * its own backedge(s) and the former backedge(s) of the peeled code - */ - int lhead_arity = 2 * backedges_n; //ARR_LEN(backedges); - int phead_arity = headarity - backedges_n; //ARR_LEN(backedges); + int backedges_n = get_backedge_n(loophead, 0); - /** We assume the worst case, in which every head entry - * origins from the same node. +1 for a null terminated list. - */ - //int tchead_arity = ARR_LEN(head_entries) + ( headarity - backedges_n) + 1 ; + int lhead_arity = 2 * backedges_n; + int phead_arity = headarity - backedges_n; + /* new in arrays */ NEW_ARR_A(ir_node *, loopheadnins, lhead_arity ); NEW_ARR_A(ir_node *, peelheadnins, phead_arity ); - phi = get_Block_phis(loophead); - while(phi) { - NEW_ARR_A(ir_node *, get_lstate(phi)->ins, lhead_arity); - phi=get_Phi_next(phi); + for_each_phi(loophead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity); } - - phi = get_Block_phis(peelhead); - while(phi) - { - NEW_ARR_A(ir_node *, get_lstate(phi)->ins, phead_arity); - phi=get_Phi_next(phi); + for_each_phi(peelhead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity); } for (i = 0; i < headarity; i++) { - ir_node *phi; ir_node *orgjmp = get_irn_n(loophead, i); ir_node *copyjmp = get_copy(orgjmp); /** * Rewire the head blocks ins and their phi ins. - * Requires blocks phi list. - * - * 1. Alien bes origin from the peeled head (new head of the whole loop) - * 2. Loops own bes must be kept/copied to the loophead. - * 3. All other edges origin from the peeled head (new head of the loop) + * Requires phi list per block. */ + if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) { + loopheadnins[lheadin_c] = orgjmp; + for_each_phi(loophead, phi) { + get_node_info( phi )->ins[lheadin_c] = get_irn_n( phi, i) ; + } + ++lheadin_c; + /* former bes of the peeled code origin now from the loophead */ + loopheadnins[lheadin_c] = copyjmp; - //printf("head i %d\n", i); - - if (is_backedge(loophead, i)) - { - if (is_alien_edge(loophead, i)) { - peelheadnins[pheadin_c] = orgjmp; /* alien bes go to the peeled head */ - //set_backedge(peelhead, pheadin_c); - - // alien bes origin at the peeled head - for_each_phi(peelhead, phi) - { - //printf("alienbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr); - get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i); - } - //printf("alienbe %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr); - ++pheadin_c; - } else { - loopheadnins[lheadin_c] = orgjmp; /* keep/copy the loops own bes */ - //set_backedge(loophead, lheadin_c); - - for_each_phi(loophead, phi) { - //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr); - get_lstate( phi )->ins[lheadin_c] = get_irn_n(phi, i); - } - //printf("normalbe %ld @ %d -> add to loophead orgjump %ld\n", loophead->node_nr, i, orgjmp->node_nr); - ++lheadin_c; - - loopheadnins[lheadin_c] = copyjmp; /* former bes of the peeled code origin now from the loophead */ - //set_not_backedge(loophead, lheadin_c); - - /* get_irn_n( get_copy_of(phi), i) get_copy_of(get_irn_n( phi, i)) - * Order is crucial! Preds outside of the loop are non existent, like Const. - */ - for_each_phi(loophead, phi) { - //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( get_copy_of(phi), i)->node_nr); - get_lstate( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ; - } - //printf("normalbe %ld @ %d -> add to loophead copyjump %ld\n", loophead->node_nr, i, copyjmp->node_nr); - ++lheadin_c; + /* get_irn_n( get_copy_of(phi), i ) get_copy_of( get_irn_n( phi, i) ) + * Order is crucial! Predecessors outside of the loop are non existent. + * The copy (cloned with its ins!) has pred i, + * but phis pred i might not have a copy of itself. + */ + for_each_phi(loophead, phi) { + get_node_info( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ; } + ++lheadin_c; } else { peelheadnins[pheadin_c] = orgjmp; - //set_not_backedge(peelhead, pheadin_c); - for_each_phi(peelhead, phi) { - //printf("edge phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( phi, i)->node_nr); - get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i); + get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i); } - //printf("edge %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr); ++pheadin_c; } }/* for */ -// printf("pheadin %d arr %d lheadin %d arr %d \n", -// pheadin_c, ARR_LEN(peelheadnins), -// lheadin_c, ARR_LEN(loopheadnins)); - assert(pheadin_c == ARR_LEN(peelheadnins) && lheadin_c == ARR_LEN(loopheadnins) && - "the number of head elements does not match the predefined one"); + "the constructed head arities do not match the predefined arities"); + /* assign the ins to the nodes */ set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins); set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins); for_each_phi(loophead, phi) { - ir_node **ins = get_lstate( phi )->ins; + ir_node **ins = get_node_info( phi )->ins; set_irn_in(phi, lhead_arity, ins); } for_each_phi(peelhead, phi) { - ir_node **ins = get_lstate( phi )->ins; + ir_node **ins = get_node_info( phi )->ins; set_irn_in(phi, phead_arity, ins); } } -ir_node *rawcopy_node(ir_node *node) +/** + * Create a raw copy (ins are still the old ones) of the given node. + * We rely on copies to be NOT visited. + */ +static ir_node *rawcopy_node(ir_node *node) { + int i, arity; ir_node *cp; - link_node_state_t *cpstate; + node_info *cpstate; cp = exact_copy(node); + + arity = get_irn_arity(node); + + for (i = 0; i < arity; ++i) { + if (is_backedge(node, i)) + set_backedge(cp, i); + } + set_copy(node, cp); - cpstate = XMALLOCZ(link_node_state_t); - cp->link = cpstate; - if (is_Block(cp)) - cp->loop = NULL; /* the copy does not belong to the loop */ - set_irn_visited(cp, current_ir_graph->visited); + cpstate = new_node_info(); + set_irn_link(cp, cpstate); + + if (is_Block(cp)) { + /* TODO + * exact_copy already sets Macroblock. + * Why do we need to do this anyway? */ + set_Block_MacroBlock(cp, cp); + } return cp; } /** - * Peels the loop by copying the contents. Graph needs some rewiring after that. + * This walker copies all walked nodes. + * If the walk_condition is true for a node, it is walked. + * All nodes node_info->copy attributes has to be NULL prior to every to every walk. */ -void peel_walk(ir_node *node) +static void copy_walk(ir_node *node, walker_condition *walk_condition, ir_loop *set_loop) { int i; int arity; ir_node *cp; ir_node **cpin; ir_graph *irg = current_ir_graph; - - //(void) env; - - link_node_state_t *nodestate = get_lstate(node); + node_info *node_info = get_node_info(node); /** * break condition and cycle resolver, creating temporary node copies */ - if (node->visited >= irg->visited) - { - if (!nodestate->cloned && !nodestate->temp) - { - /** temporary clone this node - * because we were here before and would walk into a cycle - */ - rawcopy_node(node); - nodestate->temp=1; + if (get_irn_visited(node) >= get_irg_visited(irg)) { + /* Here we rely on nodestate's copy being initialized with NULL */ + DB((dbg, LEVEL_5, "copy_walk: We have already visited %ld\n", get_irn_node_nr(node))); + if (node_info->copy == NULL) { + cp = rawcopy_node(node); + DB((dbg, LEVEL_5, "The TEMP copy of %ld is created %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp))); } return; } - //printf(" ----- WALK %ld ----- \n", node->node_nr); - /** - * WALK - */ - set_irn_visited(node, irg->visited); + /* Walk */ + mark_irn_visited(node); - if ( !is_Block(node) ) { - ir_node *pred = get_irn_n(node, -1); - if (is_in_loop(pred)) - peel_walk(pred); + if (!is_Block(node)) { + ir_node *pred = get_nodes_block(node); + if (walk_condition(pred)) + DB((dbg, LEVEL_5, "walk block %ld\n", get_irn_node_nr(pred))); + copy_walk(pred, walk_condition, set_loop); } arity = get_irn_arity(node); NEW_ARR_A(ir_node *, cpin, arity); - for (i = get_irn_arity(node) - 1; i >= 0; --i) { ir_node *pred = get_irn_n(node, i); - /* collect head entries */ - if ( is_in_head(pred) && !is_in_head(node) ) - { - loop_entry_t entry; - entry.node = node; - entry.pred_irn_n = i; - ARR_APP1(loop_entry_t, head_entries, entry); - } - - if (is_in_loop(pred)) - { - peel_walk(pred); - cpin[i] = get_copy(pred); //get_lstate(pred)->link; - //printf("copy of %ld gets in %ld", node->node_nr, cpin[i]->node_nr); + if (walk_condition(pred)) { + DB((dbg, LEVEL_5, "walk node %ld\n", get_irn_node_nr(pred))); + copy_walk(pred, walk_condition, set_loop); + cpin[i] = get_copy(pred); + DB((dbg, LEVEL_5, "copy of %ld gets new in %ld which is copy of %ld\n", + get_irn_node_nr(node), get_irn_node_nr(get_copy(pred)), get_irn_node_nr(pred))); } else { cpin[i] = pred; } - //printf("copy of %ld gets in %ld \n", node->node_nr, cpin[i]->node_nr); } - /** - * copy node / finalize temp node - */ - if (!nodestate->temp) { -// if (!is_Const(node) && !is_SymConst(node)) { - cp = rawcopy_node(node); -// } else { -// cp = node; -// //DBG -// printf("CONST FINAL: %ld -F> %ld \n", node->node_nr, cp->node_nr); -// nodestate->link = cp; -// } + /* copy node / finalize temp node */ + if (node_info->copy == NULL) { + /* No temporary copy existent */ + cp = rawcopy_node(node); + DB((dbg, LEVEL_5, "The FINAL copy of %ld is CREATED %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp))); } else { /* temporary copy is existent but without correct ins */ - cp = get_copy(node); // nodestate->link; - //printf("FINALIZE: %ld \n", cp->node_nr); + cp = get_copy(node); + DB((dbg, LEVEL_5, "The FINAL copy of %ld is EXISTENT %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp))); } - // special treatment for the head/condition: we need 3 heads for a tail-controlled and peeled loop - if (is_in_head(node)) { - // head/condition for the tail-controlled loop - // These copies are linked to the copies - rawcopy_node(cp); - } - - if (!is_Block(node)) - { + if (!is_Block(node)) { ir_node *cpblock = get_copy(get_nodes_block(node)); - /* set the block of the copy to the copied block */ - //printf(" PRE NODE %ld BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr); set_nodes_block(cp, cpblock ); - //printf(" POST NODE %ld BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr); - - /* fix the phi information in attr.phis (does not add the phi node to the block) */ + /* fix the phi information in attr.phis */ if( is_Phi(cp) ) - { add_Block_phi(cpblock, cp); - //printf("PHI-BLOCK block %ld got its phi %ld\n", cpblock->node_nr, cp->node_nr); - } - } - else { - /* macroblock info is not copied */ - set_Block_MacroBlock(cp, cp); } - //dbg valid ins? -// for(i=0; inode_nr, cp->node_nr, cpin[i]->node_nr); - + set_irn_loop(cp, set_loop); set_irn_in(cp, ARR_LEN(cpin), cpin); - -// for(i=0; i< ARR_LEN(cpin); i++) -// { -// printf("ins %ld: %ld \n", cp->node_nr, cpin[i]->node_nr); -// } - -//TODO REM -// if (!nodestate->temp) -// { -// nodestate->link = cp; -// cpstate = XMALLOCZ(link_node_state_t); -// cp->link = cpstate; -// } else { -// /* temporary copy is existent but without correct ins */ -// cp = nodestate->link; -// } - - - nodestate->temp = 0; - nodestate->cloned = 1; } -//void chklink (ir_node *n, void * e) -//{ -// ir_node *link = n->link; -// link_node_state_t *l = (link_node_state_t *)link; -// -// printf("n %ld\n", n->node_nr); -// printf("l p %ld\n", l->link); -// if (l->link) -// printf("l %ld\n", l->link->node_nr); -// -//} - /** * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds */ -void peel(void) +static void peel(out_edge *loop_outs) { int i; ir_node **entry_buffer; int entry_c = 0; - int entry_i; - NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_entries)); + ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); - for(i = 0; i < ARR_LEN(loop_entries); i++) - { - loop_entry_t entry = loop_entries[i]; + NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs)); + + /* duplicate loop walk */ + inc_irg_visited(current_ir_graph); + + for(i = 0; i < ARR_LEN(loop_outs); i++) { + out_edge entry = loop_outs[i]; ir_node *node = entry.node; ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n); if (is_Block(node)) { - /* node is block and the given pred points inside the loop */ - ir_node *cppred; - - peel_walk( pred ); - - // leave keepalives out - if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) { - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); - } else { - cppred = get_copy(pred); - //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr); - add_pred( node, cppred ); - //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr); - } - - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); - - //DBG - //phifix(node, cppred); + copy_walk(pred, is_in_loop, NULL); + duplicate_preds(node, entry.pred_irn_n, get_copy(pred) ); } else { - /* node is somewhere in the graph, outside of the loop */ - //ir_node *cppred; - //ir_node *block; - //ir_node *cpblock; - peel_walk( pred ); - - // no ssa for keepalives - if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) { - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); - } else { - //printf("fix entry %ld to %ld\n", node->node_nr, pred->node_nr); + copy_walk(pred, is_in_loop, NULL); + /* leave out keepalives */ + if (!is_End(node)) + /* Node is user of a value defined inside the loop. + * We'll need a phi since we duplicated the loop. */ + /* Cannot construct_ssa here, because it needs another walker. */ entry_buffer[entry_c++] = pred; - } + } + } + + ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); + /* Rewires the 2 heads */ + peel_fix_heads(); - // cannot construct_ssa here, because it needs another walker + /* Generate phis for values from peeled code and original loop */ + construct_ssa_foreach(entry_buffer, entry_c); + /*for(i = 0; i < entry_c; i++) + { + ir_node *cppred, *block, *cpblock, *pred; + + pred = entry_buffer[i]; + cppred = get_copy(pred); + block = get_nodes_block(pred); + cpblock = get_nodes_block(cppred); + construct_ssa(block, pred, cpblock, cppred); + }*/ +} + +/** + * Populates head_entries with (node, pred_pos) tuple + * whereas the node's pred at pred_pos is in the head but not the node itself. + * Head and condition chain blocks must be marked. + */ +static void get_head_outs(ir_node *node, void *env) +{ + int i; + int arity = get_irn_arity(node); + (void) env; - } /* is block */ - } /* for */ + DB((dbg, LEVEL_5, "get head entries %ld \n", get_irn_node_nr(node))); - //irg_walk_graph(current_ir_graph, chklink, NULL, NULL); + for(i = 0; i < arity; ++i) { + /* node is not in the head, but the predecessor is. + * (head or loop chain nodes are marked) */ - fix_head(loop_cf_head); + DB((dbg, LEVEL_5, "... ")); + DB((dbg, LEVEL_5, "node %ld marked %d (0) pred %d marked %d (1) \n", + node->node_nr, is_nodesblock_marked(node),i, is_nodesblock_marked(get_irn_n(node, i)))); - //printf (" FIXHEAD DONE :D \n"); + if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) { + out_edge entry; + entry.node = node; + entry.pred_irn_n = i; + DB((dbg, LEVEL_5, + "Found head chain entry %ld @%d because !inloop %ld and inloop %ld\n", + get_irn_node_nr(node), i, get_irn_node_nr(node), get_irn_node_nr(get_irn_n(node, i)))); + ARR_APP1(out_edge, cur_head_outs, entry); + } + } +} - entry_i = 0; +/** + * Find condition chains, and add them to be inverted, until the node count exceeds the limit. + * A block belongs to the chain if a condition branches out of the loop. + * Returns 1 if the given block belongs to the condition chain. + */ +static unsigned find_condition_chains(ir_node *block) { + const ir_edge_t *edge; + unsigned mark = 0; + int nodes_n = 0; - /* Generate phis for values from peeled code and original loop */ - for(i = 0; entry_i < entry_c; i++) - { - loop_entry_t entry = loop_entries[i]; - ir_node *node = entry.node; + DB((dbg, LEVEL_5, "condition_chains for block %ld\n", get_irn_node_nr(block))); - if (is_Block(node)) - { - /* block */ - ir_node *phi=get_Block_phis(node); + /* Collect all outs, including keeps. + * (TODO firm function for number of out edges?) */ + foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) { + ++nodes_n; + } + + /* We do not want to collect more nodes from condition chains, than the limit allows us to. + * Also, leave at least one block as body. */ + if (head_inversion_node_count + nodes_n > inversion_head_node_limit + || head_inversion_block_count + 1 == loop_info.blocks) { + set_Block_mark(block, 0); + + return 0; + } + + /* First: check our successors, and add all succs that are outside of the loop to the list */ + foreach_block_succ(block, edge) { + ir_node *src = get_edge_src_irn( edge ); + int pos = get_edge_src_pos( edge ); + + if (!is_in_loop(src)) { + out_edge entry; + + mark = 1; + entry.node = src; + entry.pred_irn_n = pos; + ARR_APP1(out_edge, cond_chain_entries, entry); + mark_irn_visited(src); + } + } + + if (mark == 0) { + /* this block is not part of the chain, + * because the chain would become too long or we have no successor outside of the loop */ + + set_Block_mark(block, 0); + return 0; + } else { + set_Block_mark(block, 1); + ++head_inversion_block_count; + DB((dbg, LEVEL_5, "block %ld is part of condition chain\n", get_irn_node_nr(block))); + head_inversion_node_count += nodes_n; + } + + /* Second: walk all successors, and add them to the list if they are not part of the chain */ + foreach_block_succ(block, edge) { + unsigned inchain; + ir_node *src = get_edge_src_irn( edge ); + int pos = get_edge_src_pos( edge ); + + /* already done cases */ + if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) { + continue; + } + + mark_irn_visited(src); + DB((dbg, LEVEL_5, "condition chain walk %ld\n", get_irn_node_nr(src))); + inchain = find_condition_chains(src); + + /* if successor is not part of chain we need to collect its outs */ + if (!inchain) { + out_edge entry; + entry.node = src; + entry.pred_irn_n = pos; + ARR_APP1(out_edge, cond_chain_entries, entry); + } + } + return mark; +} + +/** + * Rewire the loop head and inverted head for loop inversion. + */ +static void inversion_fix_heads(void) +{ + ir_node **loopheadnins, **invheadnins; + ir_node *loophead = loop_cf_head; + ir_node *invhead = get_copy(loophead); + + int headarity = get_irn_arity(loophead); + ir_node *phi; + int i; - while(phi) - { - add_pred(phi, entry_buffer[entry_i++]); - phi=get_Phi_next(phi); + int lheadin_c = 0; + int iheadin_c = 0; + + int backedges_n = get_backedge_n(loophead, 0); + int lhead_arity = backedges_n; + int ihead_arity = headarity - backedges_n; + + assert(lhead_arity != 0 && "Loophead has arity 0. Probably wrong backedge informations."); + assert(ihead_arity != 0 && "Inversionhead has arity 0. Probably wrong backedge informations."); + + /* new in arrays for all phis in the head blocks */ + NEW_ARR_A(ir_node *, loopheadnins, lhead_arity); + NEW_ARR_A(ir_node *, invheadnins, ihead_arity); + + for_each_phi(loophead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity); + } + for_each_phi(invhead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity); + } + + for (i = 0; i < headarity; i++) { + ir_node *pred = get_irn_n(loophead, i); + + /** + * Rewire the head blocks ins and their phi ins. + * Requires phi list per block. + */ + if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) { + /* just copy these edges */ + loopheadnins[lheadin_c] = pred; + for_each_phi(loophead, phi) { + get_node_info(phi)->ins[lheadin_c] = get_irn_n(phi, i); } + ++lheadin_c; } else { - /* not block */ + invheadnins[iheadin_c] = pred; + for_each_phi(invhead, phi) { + get_node_info(phi)->ins[iheadin_c] = get_irn_n(phi, i) ; + } + ++iheadin_c; + } + } - ir_node *cppred, *block, *cpblock, *pred; + /* assign the ins to the head blocks */ + set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins); + set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins); - /** - * pred = get_irn_n(entry.node, entry.pred_irn_n); - * does not work, because we could have changed the nodes preds in construct_ssa - */ + /* assign the ins for the phis */ + for_each_phi(loophead, phi) { + ir_node **ins = get_node_info(phi)->ins; + set_irn_in(phi, lhead_arity, ins); + } + + for_each_phi(invhead, phi) { + ir_node **ins = get_node_info(phi)->ins; + set_irn_in(phi, ihead_arity, ins); + } +} + +static void inversion_walk(out_edge *head_entries) +{ + int i; + ir_node *phi; + int entry_c = 0; + ir_node **entry_buffer; + ir_node **head_phi_assign; + + NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries)); - pred = entry_buffer[entry_i++]; + head_phi_assign = NEW_ARR_F(ir_node *, 0); - //printf("pred %ld\n", pred->node_nr); - cppred = get_copy(pred); - //printf("cppred %ld\n", cppred->node_nr); - block = get_nodes_block(pred); - //printf("block %ld\n", block->node_nr); - cpblock = get_nodes_block(cppred); - //printf("cpblock %ld\n", cpblock->node_nr); + /* Find assignments in the condition chain, to construct_ssa for them after the loop inversion. */ + for_each_phi(loop_cf_head , phi) { + for(i=0; ilink = (void *)state; + cur_loop_outs = NEW_ARR_F(out_edge, 0); + irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL ); + + peel(cur_loop_outs); + + /* clean up */ + reset_node_infos(); + + set_irg_doms_inconsistent(current_ir_graph); + set_irg_loopinfo_inconsistent(current_ir_graph); + set_irg_outs_inconsistent(current_ir_graph); + + DEL_ARR_F(cur_loop_outs); } -void free_linkstructs(ir_node *node, void *env) +/* Loop inversion */ +void loop_inversion(void) { - (void) env; - xfree( (link_node_state_t*) node->link); + unsigned do_inversion = 1; + + inversion_head_node_limit = INT_MAX; + + /* Search for condition chains. */ + ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK); + + irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL); + + loop_info.blocks = get_loop_n_blocks(cur_loop); + cond_chain_entries = NEW_ARR_F(out_edge, 0); + + head_inversion_node_count = 0; + head_inversion_block_count = 0; + + set_Block_mark(loop_cf_head, 1); + mark_irn_visited(loop_cf_head); + inc_irg_visited(current_ir_graph); + + find_condition_chains(loop_cf_head); + + DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks)); + if (loop_info.blocks < 2) { + do_inversion = 0; + DB((dbg, LEVEL_3, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks)); + } + + /* We also catch endless loops here, + * because they do not have a condition chain. */ + if (head_inversion_block_count < 1) { + do_inversion = 0; + DB((dbg, LEVEL_3, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", head_inversion_block_count)); + } + + if (do_inversion) { + cur_head_outs = NEW_ARR_F(out_edge, 0); + + /* Get all edges pointing into the head or condition chain (outs). */ + irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL); + inversion_walk(cur_head_outs); + + DEL_ARR_F(cur_head_outs); + + set_irg_doms_inconsistent(current_ir_graph); + set_irg_loopinfo_inconsistent(current_ir_graph); + set_irg_outs_inconsistent(current_ir_graph); + } + + /* free */ + DEL_ARR_F(cond_chain_entries); + ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK); } -void decision_maker(void) +/** + * Returns last element of linked list of copies by + * walking the linked list. + */ +ir_node *get_last_copy(ir_node *node) { - //inc_irg_visited(current_ir_graph); - //loop_walker( loop_entries, NULL, get_invariants, NULL ); + ir_node *copy, *cur; + cur = node; + while ((copy = get_copy(cur))) { + cur = copy; + } + return cur; +} +/** + * Rewire floating copies of the current loop. + */ +void unrolling_fix_cf(void) +{ + ir_node *loophead = loop_cf_head; + int headarity = get_irn_arity(loophead); + ir_node *phi, *headnode; + /*ir_node *high, *low;*/ + int i; - inc_irg_visited(current_ir_graph); - irg_walk_graph(current_ir_graph, alloc_linkstructs, NULL, NULL); + int uhead_in_n = 0; + int backedges_n = get_backedge_n(loophead, 0); + int unroll_arity = backedges_n; - inc_irg_visited(current_ir_graph); - peel(); + /* Create ins for all heads and their phis */ + headnode = get_copy(loophead); + for_each_copy(headnode) { + NEW_ARR_A(ir_node *, get_node_info(headnode)->ins, unroll_arity); + for_each_phi(headnode, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, unroll_arity); + } + } - inc_irg_visited(current_ir_graph); - irg_walk_graph(current_ir_graph, free_linkstructs, NULL, NULL); + /* Append the copies to the existing loop. */ + for (i = 0; i < headarity; i++) { + ir_node *upper_head = loophead; + ir_node *lower_head = get_copy(loophead); + + ir_node *upper_pred = get_irn_n(loophead, i); + ir_node *lower_pred = get_copy(get_irn_n(loophead, i)); + + ir_node *last_pred; + + /** + * Build unrolled loop top down + */ + if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) { + for_each_copy2(upper_head, lower_head, upper_pred, lower_pred) { + get_node_info(lower_head)->ins[uhead_in_n] = upper_pred; + + for_each_phi(upper_head, phi) { + ir_node *phi_copy = get_copy(phi); + get_node_info(phi_copy)->ins[uhead_in_n] = get_irn_n(phi, i); + } + } + + last_pred = upper_pred; + ++uhead_in_n; + + /* Fix the topmost loop heads backedges. */ + set_irn_n(loophead, i, last_pred); + for_each_phi(loophead, phi) { + ir_node *last_phi = get_last_copy(phi); + ir_node *pred = get_irn_n(last_phi, i); + set_irn_n(phi, i, pred); + } + } + } + + headnode = get_copy(loophead); + for_each_copy(headnode) { + set_irn_in(headnode, unroll_arity, get_node_info(headnode)->ins); + for_each_phi(headnode, phi) { + set_irn_in(phi, unroll_arity, get_node_info(phi)->ins); + } + } } +#if 0 +static ir_node *add_phi(ir_node *node, int phi_pos) +{ + ir_mode *mode; + ir_node *phi; + ir_node **in; + mode = get_irn_mode(get_irn_n(node, phi_pos)); + ir_node *block = get_nodes_block(node); + int n_cfgpreds = get_irn_arity(block); + ir_node *pred = get_irn_n(node, phi_pos); + int i; + + /* create a new Phi */ + NEW_ARR_A(ir_node*, in, n_cfgpreds); + for(i = 0; i < n_cfgpreds; ++i) + in[i] = new_Unknown(mode); /*pred;*/ + + phi = new_r_Phi(block, n_cfgpreds, in, mode); + + assert(phi && "phi null"); + assert(is_Bad(phi) && "phi bad"); + + /* Important: always keep block phi list up to date. */ + add_Block_phi(block, phi); + /* EVERY node is assumed to have a node_info linked. */ + alloc_node_info(phi, NULL); + + set_irn_n(node, phi_pos, phi); + return phi; +} +#endif + /** - * TODO use list , not arr_F + * Loop unrolling + * Could be improved with variable range informations. */ -void analyze_loop(ir_loop *loop) +void loop_unrolling(void) { - /* Init new for every loop */ - loop_cf_head = NULL; - loop_cf_head_valid = 1; - //loop_entries_n = 0; - backedges_n = 0; - has_sto = 0; + int i, j; - cur_loop = loop; + unroll_times = 8; + + cur_loop_outs = NEW_ARR_F(out_edge, 0); + irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL ); + + ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); + + /* duplicate whole loop content */ + inc_irg_visited(current_ir_graph); - /* arrays */ - //backedges = NEW_ARR_F(loop_entry_t, 0); - //alien_backedges = NEW_ARR_F(loop_entry_t, 0); - //head_edges = NEW_ARR_F(loop_entry_t, 0); + for(i = 0; i < ARR_LEN(cur_loop_outs); i++) { + out_edge entry = cur_loop_outs[i]; + ir_node *node = entry.node; + ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n); + + if (!is_Block(node)) { + + for (j = 0; j < unroll_times-1; ++j) { + copy_walk(pred, is_in_loop, cur_loop); + + pred = get_copy(pred); + } + } + } - loop_entries = NEW_ARR_F(loop_entry_t, 0); - head_entries = NEW_ARR_F(loop_entry_t, 0); + for(i = 0; i < ARR_LEN(cur_loop_outs); i++) { + out_edge entry = cur_loop_outs[i]; + ir_node *node = entry.node; + ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n); + + /* build linked list of copied nodes/blocks */ + if (is_Block(node)) { + for (j = 0; j < unroll_times-1; ++j) { + copy_walk(pred, is_in_loop, cur_loop); + duplicate_preds(node, entry.pred_irn_n, get_copy(pred)); + + pred = get_copy(pred); + } + } + } - inc_irg_visited( current_ir_graph ); - irg_walk_graph( current_ir_graph, block_phi_walker, NULL, NULL ); + ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); - /* Collect all backedges */ - for_each_loop_block(loop, collect_backedges, NULL ); + /* Line up the floating copies. */ + unrolling_fix_cf(); - /* Find loop entries walk, find head */ - inc_irg_visited( current_ir_graph ); - irg_walk_graph( current_ir_graph, find_loop_entries_walk, NULL, NULL ); + /* Generate phis for all loop outs */ + for(i = 0; i < ARR_LEN(cur_loop_outs); i++) { + out_edge entry = cur_loop_outs[i]; + ir_node *node = entry.node; + ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n); + + if (!is_Block(node) && !is_End(node)) { + DB((dbg, LEVEL_1, " construct_ssa_n def %ld node %ld pos %d\n", + get_irn_node_nr(pred), get_irn_node_nr(node), entry.pred_irn_n)); + construct_ssa_n(pred, node); + } + } + + DEL_ARR_F(cur_loop_outs); + + set_irg_doms_inconsistent(current_ir_graph); + set_irg_loopinfo_inconsistent(current_ir_graph); + set_irg_outs_inconsistent(current_ir_graph); +} + +/* Initialization and */ +static void init_analyze(ir_loop *loop) +{ + /* Init new for every loop */ + cur_loop = loop; + + loop_cf_head = NULL; + loop_cf_head_valid = 1; + loop_inv_head = NULL; + loop_peeled_head = NULL; + + loop_info.outs = 0; + loop_info.calls = 0; + loop_info.loads = 0; + loop_info.blocks = 0; + + DB((dbg, LEVEL_2, " >>>> current loop includes node %ld <<<\n", get_irn_node_nr(get_loop_node(loop, 0)))); + + irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL); /* RETURN if there is no valid head */ - if (!loop_cf_head || !loop_cf_head_valid) - { - //DBG printf("NOTE: There is no valid loop head. Nothing done.\n"); + if (!loop_cf_head || !loop_cf_head_valid) { + DB((dbg, LEVEL_2, "No valid loop head. Nothing done.\n")); return; } - decision_maker(); + if (enable_peeling) + loop_peeling(); - // TODO free all link states... or better put them on functionstack + if (enable_inversion) + loop_inversion(); + if (enable_unrolling) + loop_unrolling(); - /* FREE */ - DEL_ARR_F(loop_entries); - DEL_ARR_F(head_entries); - //DEL_ARR_F(backedges); - //DEL_ARR_F(alien_backedges); - //DEL_ARR_F(head_edges); +#if 0 + /* RETURN if there is a call in the loop */ + if (loop_info.calls) + return; +#endif - //dump_ir_block_graph(current_ir_graph, "-lu1"); + DB((dbg, LEVEL_2, " <<<< end of loop with node %ld >>>>\n", get_irn_node_nr(get_loop_node(loop, 0)))); } -/** - * Find most inner loops and send them to analyze_loop - */ -void analyze_inner_loop(ir_loop *loop) +/* Find most inner loops and send them to analyze_loop */ +static void find_most_inner_loop(ir_loop *loop) { /* descend into sons */ int sons = get_loop_n_sons(loop); - //printf("found %d loops \n", sons); + if (sons == 0) { + loop_element elem; + int el_n, i; - if (sons==0) - { - //printf("analyze loop %ld\n", loop->loop_nr); - analyze_loop(loop); - } - else - { + el_n = get_loop_n_elements(loop); + + for (i=0; i < el_n; ++i) { + elem = get_loop_element(loop, i); + /* We can only rely on the blocks, + * as the loop attribute of the nodes seems not to be set. */ + if (is_ir_node(elem.kind) && is_Block(elem.node)) { + ARR_APP1(ir_node *, loops, elem.node); + DB((dbg, LEVEL_5, "Found most inner loop (contains block %+F)\n", elem.node)); + break; + } + } + } else { int s; - for(s=0; sloop_nr); - analyze_inner_loop( get_loop_son(loop, s) ); + for(s=0; spass, name ? name : "loop_unroll", -// loop_unroll_wrapper); -//} + DB((dbg, LEVEL_2, " >>> unrolling (Startnode %ld) <<<\n", + get_irn_node_nr(get_irg_start(irg)))); -/* -void firm_init_loopunroll(void) { - FIRM_DBG_REGISTER(dbg, "firm.opt.loopunroll"); -}*/ + loop_optimization(irg); + + DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %ld) <<<\n", + get_irn_node_nr(get_irg_start(irg)))); +} + +void do_loop_inversion(ir_graph *irg) +{ + enable_unrolling = 0; + enable_peeling = 0; + enable_inversion = 1; + + DB((dbg, LEVEL_2, " >>> inversion (Startnode %ld) <<<\n", + get_irn_node_nr(get_irg_start(irg)))); + + loop_optimization(irg); + + DB((dbg, LEVEL_2, " >>> inversion done (Startnode %ld) <<<\n", + get_irn_node_nr(get_irg_start(irg)))); +} + +void do_loop_peeling(ir_graph *irg) +{ + enable_unrolling = 0; + enable_peeling = 1; + enable_inversion = 0; + + DB((dbg, LEVEL_2, " >>> peeling (Startnode %ld) <<<\n", + get_irn_node_nr(get_irg_start(irg)))); + + loop_optimization(irg); + + DB((dbg, LEVEL_2, " >>> peeling done (Startnode %ld) <<<\n", + get_irn_node_nr(get_irg_start(irg)))); + +} + +void firm_init_loop_opt(void) +{ + FIRM_DBG_REGISTER(dbg, "firm.opt.loop"); +}