X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fopt%2Floop.c;h=39df5e583a2ed10d72f4c522af7844626f34196e;hb=a619ce99e40de4eb4481a590970a881e9f24627a;hp=b0d0f8f656f601d66409405459d2739fd602d9d9;hpb=47f788a47e0158da3a8a755c4a5bbd1653832465;p=libfirm diff --git a/ir/opt/loop.c b/ir/opt/loop.c index b0d0f8f65..39df5e583 100644 --- a/ir/opt/loop.c +++ b/ir/opt/loop.c @@ -19,344 +19,310 @@ /** * @file - * @brief Loop peeling and unrolling * @author Christian Helmer + * @brief loop inversion and loop unrolling, loop peeling + * * @version $Id$ */ +#include "config.h" -//#include "config.h" - -//#include -#include - +#include "iroptimize.h" +#include "opt_init.h" #include "irnode.h" -#include "irnode_t.h" -#include "irgraph_t.h" -//#include "irprog_t.h" +#include "debug.h" -//#include "iroptimize.h" -#include "ircons_t.h" -#include "iropt_t.h" +#include "ircons.h" #include "irgopt.h" -//#include "irgmod.h" +#include "irgmod.h" #include "irgwalk.h" - -//#include "array_t.h" -#include "list.h" -//#include "pset.h" -//#include "pmap.h" -//#include "pdeq.h" -//#include "xmalloc.h" -//#include "pqueue.h" - #include "irouts.h" -#include "irloop_t.h" -#include "irbackedge_t.h" -//#include "opt_inline_t.h" -//#include "cgana.h" -//#include "trouts.h" -//#include "error.h" - -//#include "analyze_irg_args.h" -#include "iredges_t.h" -//#include "irflag_t.h" -//#include "irhooks.h" +#include "iredges.h" #include "irtools.h" -//#include "iropt_dbg.h" -#include "irpass_t.h" -#include "irloop.h" - #include "array_t.h" -#include "irdump.h" +#include "beutil.h" +#include "irloop_t.h" +#include "irpass.h" +DEBUG_ONLY(static firm_dbg_module_t *dbg); -/* convenience macro iterating over every phi node of the block */ +/** + * Convenience macro for iterating over every phi node of the given block. + * Requires phi list per block. + */ #define for_each_phi(block, phi) \ for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) ) -ir_loop *cur_loop; +/* current loop */ +static ir_loop *cur_loop; -/* The loop walker should be possible to abort if nothing can be done anymore */ +/* abortable walker function */ typedef unsigned irg_walk_func_abortable(ir_node *, void *); -/* stores pair of node and number for nodes predecessor */ -typedef struct loop_entry_t { - ir_node *node; /* node outside of the loop */ - int pred_irn_n; /* with pred_irn_n pointing inside loop */ -} loop_entry_t; - -/* Store complex values in the nodes link */ -//TODO optimize. Every node has these values and seldom many otm are used. -typedef struct link_node_state_t { - unsigned cloned:1; - unsigned temp:1; +/* condition for walking a node during a copy_walk */ +typedef unsigned walker_condition(ir_node *); + +/* node and position of a predecessor */ +typedef struct out_edges { + ir_node *node; + int pred_irn_n; +} out_edge; + +/* access complex values through the nodes links */ +typedef struct node_info { unsigned invariant:1; - ir_node *link; - ir_node *ssalink; /* we will have to keep the link to the copies, as well as have temporary links for ssa creation */ - ir_node **ins; /* ins for phi nodes, during rewiring of blocks */ - // TODO omit ins. can be replaced by new ins and newunknown ins for each -} link_node_state_t; - - -loop_entry_t *loop_entries; /* loop entries (from below) in the node graph */ -loop_entry_t *backedges; /* backedges exclusively from the current loop */ -loop_entry_t *alien_backedges; /* The head can be head of several loops. */ -loop_entry_t *head_edges; /* The head can be head of several loops. */ - -ir_node *loop_cf_head = NULL; /* loop exit in the node graph */ -unsigned loop_cf_head_valid = 1; /* a loop may/must have one head, otherwise invalid */ - -unsigned has_sto = 0; /* If we store inside the loop we might - * have disambiguation problems */ -//DBG -//void arrdump(ir_node **arr) -//{ -// int i; -// for (i=0; inode_nr), is_Block(arr[i])); -// } -//} + ir_node *copy; + ir_node *link; /* temporary links for ssa creation */ + ir_node **ins; /* ins for phi nodes, during rewiring of blocks */ + unsigned done; + struct node_info *freelistnext; /* linked list to free all node_infos */ +} node_info; + +static node_info *link_node_state_list; /* head of the linked list to free all node_infos */ + +static out_edge *cur_loop_outs; /* A walker may start visiting the current loop with these nodes. */ +static out_edge *cur_head_outs; /* A walker may start visiting the cur head with these nodes. */ + +static ir_node *loop_cf_head = NULL; /* Loop head node */ +static unsigned loop_cf_head_valid = 1; /* A loop may have one head, otherwise we do not touch it. */ + +ir_node **loops; + +/* Inverted head */ +static ir_node *loop_inv_head = NULL; +/* Peeled head */ +static ir_node *loop_peeled_head = NULL; + +/* Loop analysis informations */ +typedef struct loop_info_t { + unsigned blocks; /* number of blocks in the loop */ + unsigned calls; /* number of calls */ + unsigned loads; /* number of load nodes */ + unsigned outs; /* outs without keepalives */ +#if 0 + unsigned invariant_loads; + unsigned stores; /* number of store nodes */ + unsigned opnodes_n; /* nodes that probably result in an instruction */ + unsigned do_invariant_opt; +#endif +} loop_info_t; + +/* Information about the current loop */ +static loop_info_t loop_info; + +/* A walker may start visiting a condition chain with these nodes. */ +static out_edge *cond_chain_entries; + +/* Number of unrolling */ +int unroll_times; + +static unsigned head_inversion_node_count; +static unsigned inversion_head_node_limit; +static unsigned head_inversion_block_count; + +static unsigned enable_peeling; +static unsigned enable_inversion; +static unsigned enable_unrolling; /** - * Returns the state of the given node. + * + * ============= AUXILIARY FUNCTIONS ===================================== */ -link_node_state_t *get_lstate(ir_node *n) -{ - return ((link_node_state_t *)n->link); -} + /** - * Returns the link inside of the nodes state which is pointing to its copy - * most of the time during loop peeling. + * Creates object on the heap, and adds it to a linked list to free it later. */ -ir_node *get_copy_of(ir_node *n) +static node_info *new_node_info(void) { - return ((link_node_state_t *)n->link)->link; + node_info *l = XMALLOCZ(node_info); + l->freelistnext = link_node_state_list; + link_node_state_list = l; + l->copy = NULL; + l->invariant = 0; + return l; } -/** - * Returns true if the node or block is in cur_loop. - */ -unsigned is_in_loop(ir_node *node) +static node_info *get_node_info(ir_node *n) { -// if (is_Block(node)) { -// if (node->loop == cur_loop) { -// printf(" INLOOP %ld \n", node->node_nr); -// } -// return (node->loop == cur_loop); -// } else { -// if ( get_nodes_block(node)->loop == cur_loop ) { -// printf(" INLOOP %ld \n", node->node_nr); -// } -// return ( get_nodes_block(node)->loop == cur_loop ); -// } - if (is_Block(node)) { - return (node->loop == cur_loop); - } else { - return ( get_nodes_block(node)->loop == cur_loop ); + return ((node_info *)get_irn_link(n)); +} + +/* Allocates a node_info struct for the given node. For use with a walker. */ +static void alloc_node_info(ir_node *node, void *env) +{ + node_info *state; + (void) env; + state = new_node_info(); + set_irn_link(node, (void *)state); +} + +static void free_node_info(void) +{ + int a = 0; + node_info *n; + n = link_node_state_list; + while (n) { + node_info *next = n->freelistnext; + ++a; + xfree(n); + n = next; } + link_node_state_list = NULL; } /** - * Returns if the given be is an alien edge + * Use the linked list to reset the reused values of all node_infos + * Reset in particular the copy attribute as copy_walk uses it to determine a present copy */ -unsigned is_alien_edge(ir_node *n, int i) +static void reset_node_infos(void) { - return( !is_in_loop( get_irn_n( n, i ) ) ); + node_info *next; + next = link_node_state_list; + while (next->freelistnext) { + node_info *cur = next; + next = cur->freelistnext; + cur->copy = NULL; + cur->ins = NULL; + cur->link = NULL; + } } -static void add_pred(ir_node* node, ir_node* x) +/* Returns the nodes node_info link. */ +static ir_node *get_link(ir_node *n) { - ir_node** ins; - int n; - int i; - -// if(!node) -// printf("NONODE\n"); - - //printf("addpred %ld pred %ld \n", node->node_nr, x->node_nr); - - // WHY limit it to blocks and phi? - //assert(is_Block(node) || is_Phi(node)); - - n = get_irn_arity(node); - NEW_ARR_A(ir_node*, ins, n + 1); - for (i = 0; i < n; i++) - ins[i] = get_irn_n(node, i); - ins[n] = x; - set_irn_in(node, n + 1, ins); + return ((node_info *)get_irn_link(n))->link; } -void block_phi_walker(ir_node *n, void *env) +/* Sets the nodes node_info link. */ +static void set_link(ir_node *n, ir_node *link) { - const ir_edge_t *edge; - (void) env; - - /* RETURN */ - if (!is_Block(n)) - return; + ((node_info *)get_irn_link(n))->link = link; +} - /* generate phi list for every block */ - n->attr.block.phis = NULL; +/* Returns a nodes copy. */ +static ir_node *get_copy(ir_node *n) +{ + return ((node_info *)get_irn_link(n))->copy; +} - foreach_out_edge(n, edge) { - ir_node *src = get_edge_src_irn(edge); - if (is_Phi(src)) - { - //printf("%ld has phi %ld \n", block->node_nr, src->node_nr); - add_Block_phi(n, src); - } - } +/* Sets a nodes copy. */ +static void set_copy(ir_node *n, ir_node *copy) +{ + ((node_info *)get_irn_link(n) )->copy = copy; } /** - * Calls func() for every block in the given loop. + * Convenience macro for iterating over every copy in a linked list + * of copies. */ -void for_each_loop_block(ir_loop *loop, irg_walk_func *func, void *env) -{ - int elements, e; - elements = get_loop_n_elements(loop); - - for(e=0; enode_nr); - func(elem.node, env); - } - } -} +#define for_each_copy(node) \ + for ( ; (node) ; (node) = get_copy(node)) /** - * collects the blocks backedges and creates the phi list for every block + * Convenience macro for iterating over every copy in 2 linked lists + * of copies in parallel. */ -void collect_backedges(ir_node *block, void *env) -{ - (void) env; +#define for_each_copy2(high1, low1, high2, low2) \ + for ( ; (low1) && (low2); (high1) = (low1), (low1) = get_copy(low1), \ + (high2) = (low2), (low2) = get_copy(low2)) - printf("LOOP BLOCK %ld\n", block->node_nr); +/* + * Returns 0 if the node or block is not in cur_loop. + */ +static unsigned is_in_loop(ir_node *node) +{ + return (get_irn_loop(get_block(node)) == cur_loop); +} - /* collect backedges */ - if (has_backedges(block)) - { - int i; - int arity = get_irn_arity(block); +/* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */ +static unsigned is_alien_edge(ir_node *n, int i) +{ + return(!is_in_loop(get_irn_n(n, i))); +} - for(i = 0; i < arity; ++i) { - ir_node *pred = get_irn_n(block, i); +/* used for block walker */ +static void reset_block_mark(ir_node *node, void * env) +{ + (void) env; - loop_entry_t be; - be.node = block; - be.pred_irn_n = i; + if (is_Block(node)) + set_Block_mark(node, 0); +} - ARR_APP1(loop_entry_t, head_edges, be); +static unsigned is_nodesblock_marked(ir_node* node) +{ + return (get_Block_mark(get_block(node))); +} - if (is_backedge(block, i) ) - { - if ( is_in_loop(pred) ) { - //printf("be: %ld --> %ld \n", block->node_nr, pred->node_nr); - ARR_APP1(loop_entry_t, backedges, be); - } else { - //printf("alien be: %ld --> %ld \n", block->node_nr, pred->node_nr); - ARR_APP1(loop_entry_t, alien_backedges, be); - } - } -// else { -// if ( !is_in_loop(pred) ) { -// ARR_APP1(loop_entry_t, head_edges, be); -// } +/* Returns the number of blocks in a loop. */ +static int get_loop_n_blocks(ir_loop *loop) +{ + int elements, e; + int blocks = 0; + elements = get_loop_n_elements(loop); - } + for (e=0; evisited); + NEW_ARR_A(ir_node*, ins, block_arity + 1); - if (node->op != op_Block) { - ir_node *pred = get_irn_n(node, -1); - if (pred->visited < irg->visited) - { - stop = loop_walker_rec(pred, pre, post, env); - if (stop) - return stop; - } + for (i = 0; i < block_arity; ++i) { + ins[i] = get_irn_n(block, i); } + ins[block_arity] = newpred; - for (i = get_irn_arity(node) - 1; i >= 0; --i) { - ir_node *pred = get_irn_n(node, i); - if (pred->visited < irg->visited) - { - stop = loop_walker_rec(pred, pre, post, env); - if (stop) - return stop; - } - } + set_irn_in(block, block_arity + 1, ins); - if (post) - return post(node, env); - return 0; -} + /* LDBG 1 */ +#if 1 + for_each_phi(block, phi) { + int phi_arity = get_irn_arity(phi); + DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %N\n", phi)); -/** - * Walks through loop nodes. - * The entries of the loop (all edges pointing into the loop) have to be given. - */ -unsigned loop_walker(loop_entry_t *entries, - irg_walk_func_abortable *pre, irg_walk_func_abortable *post, void * env) -{ - int i; - int stop = 0; - - for (i=0; !stop && ilink = (void *)state; - - int i, arity; arity = get_irn_arity(node); for (i = 0; i < arity; i++) { ir_node *pred = get_irn_n(node, i); @@ -364,147 +330,59 @@ void find_loop_entries_walk(ir_node *node, void *env) pred_in_loop = is_in_loop(pred); node_in_loop = is_in_loop(node); - //Find the loops head/the blocks with cfpred outside of the loop - if (is_Block(node) && node_in_loop - && !pred_in_loop && loop_cf_head_valid) - { + /* collect some loop information */ + if (node_in_loop) { + if (is_Call(node)) + ++loop_info.calls; + } + + /* Find the loops head/the blocks with cfpred outside of the loop */ + if (is_Block(node) && node_in_loop && !pred_in_loop && loop_cf_head_valid) { ir_node *cfgpred = get_Block_cfgpred(node, i); - if ( !is_in_loop(cfgpred) ) - { - //another head? We do not touch this. - if (loop_cf_head && loop_cf_head != node) - { + if (!is_in_loop(cfgpred)) { + DB((dbg, LEVEL_5, "potential head %+F because inloop and pred %+F not inloop\n", node, pred)); + /* another head? We do not touch this. */ + if (loop_cf_head && loop_cf_head != node) { loop_cf_head_valid = 0; - } - else - { + } else { loop_cf_head = node; } } } - - if ( pred_in_loop && !node_in_loop ) - { - /* we walked right into the loop. */ - loop_entry_t entry; - entry.node = node; - entry.pred_irn_n = i; - - //DBG -// printf("inloop: %ld --> inloop %ld (@ %d) \n", -// node->node_nr, pred->node_nr, i); - - ARR_APP1(loop_entry_t, loop_entries, entry); - } } } -// TODO needed? -///** -// * Finds invariant nodes and marks them as invariant. -// * (Post walk) -// */ -//unsigned get_invariants(ir_node *node, void *env) -//{ -// unsigned invar = 1; -// (void) env; -// -// if (is_Store(node)) -// { -// has_sto = 1; -// /* RETURN and abort walker */ -// return 1; -// } -// -// int arity = get_irn_arity(node); -// -// /* RETURN, no preds to visit */ -// if (arity == 0) return 0; -// -// if (is_Load(node)) -// { -// assert(arity>=2 && "expected load to have edge nr 1 (address)"); -// -// ir_node *pred = get_irn_n(node, 1); -// if (!is_in_loop(pred) /* Everything outside the loop is considered invariant */ -// || is_Const(pred) /* This is not true, but we also want the quasi-invariants. */ -// || is_SymConst(pred) -// || get_lstate(node)->invariant) -// { -// //printf("## CONSTLOAD: %ld \n", node->node_nr); -// get_lstate(node)->invariant = 1; -// } else -// { -// get_lstate(node)->invariant = 0; -// } -// } -// else -// { -// int i; -// invar = 1; -// /* find loop variant preds */ -// for(i = 0; i < arity; ++i) -// { -// ir_node *pred = get_irn_n(node, i); -// -// if ( !(!is_in_loop(pred) /* outside loop is loop invariant */ -// || is_Const(pred) /* constants */ -// || is_SymConst(pred) /* SymConst, if no Store */ -// || get_lstate(node)->invariant /* pred is marked as invariant */ -// ) ) -// { -// invar = 0; -// } -// } -// -// if (invar) { -// printf("const: %ld \n", node->node_nr); -// get_lstate(node)->invariant = 1; -// } else { -// get_lstate(node)->invariant = 0; -// } -//// DBG -//// if (!is_nodes_block_marked(pred)) { -//// //printf("pred outloop: %ld, pred %ld (const)\n", node->node_nr, pred->node_nr); -//// } else if (is_Const(pred) || is_SymConst(pred)) // || is_Phi(pred)) { -//// //printf("predconst: %ld, pred %ld CONST\n", node->node_nr, pred->node_nr); -//// } else if (pred->link == MARKED_CONST) { -//// //printf("predmarked: %ld, pred %ld const\n", node->node_nr, pred->node_nr); -//// } else { -//// mark=0; -//// } -// } -// return 0; -//} - -////TODO DBG Remove -void phifix(ir_node *node, ir_node *newpred) +/* Adds all nodes pointing into the loop to loop_entries and also finds the loops head */ +static void get_loop_outs(ir_node *node, void *env) { - ir_node *phi=get_Block_phis(node); - while(phi) - { - int pa = get_irn_arity(phi); - int ba = get_irn_arity(node); + unsigned node_in_loop, pred_in_loop; + int i, arity; + (void) env; + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + ir_node *pred = get_irn_n(node, i); + pred_in_loop = is_in_loop(pred); + node_in_loop = is_in_loop(node); - while(ba>pa) - { - printf("!!!!!!!!!! block has %d, phi had %d\n", ba, pa ); - add_pred(phi, newpred); - pa++; - printf("!!!!!!!!!! block has %d, phi has now %d\n", ba, pa ); + if (pred_in_loop && !node_in_loop) { + out_edge entry; + entry.node = node; + entry.pred_irn_n = i; + ARR_APP1(out_edge, cur_loop_outs, entry); } - phi=get_Phi_next(phi); } } static ir_node *ssa_second_def; static ir_node *ssa_second_def_block; -static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, - int first) +/** + * Walks the graph bottom up, searching for definitions and creates phis. + */ +static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode) { int i; int n_cfgpreds; @@ -512,22 +390,22 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, ir_node *phi; ir_node **in; - /* This is needed because we create bads sometimes */ - if (is_Bad(block)) + DB((dbg, LEVEL_5, "ssa search_def_and_create_phis: block %N\n", block)); + + /* Prevents creation of phi that would be bad anyway. + * Dead and bad blocks. */ + if (get_irn_arity(block) < 1 || is_Bad(block)) return new_Bad(); - /* the other defs can't be marked for cases where a user of the original - * value is in the same block as the alternative definition. - * In this case we mustn't use the alternative definition. - * So we keep a flag that indicated wether we walked at least 1 block - * away and may use the alternative definition */ - if (block == ssa_second_def_block && !first) { + if (block == ssa_second_def_block) { + DB((dbg, LEVEL_5, "ssa found second definition: use second def %N\n", ssa_second_def)); return ssa_second_def; } /* already processed this block? */ if (irn_visited(block)) { - ir_node *value = get_lstate(block)->ssalink; + ir_node *value = get_link(block); + DB((dbg, LEVEL_5, "ssa already visited: use linked %N\n", value)); return value; } @@ -538,29 +416,43 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, n_cfgpreds = get_Block_n_cfgpreds(block); if (n_cfgpreds == 1) { ir_node *pred_block = get_Block_cfgpred_block(block, 0); - ir_node *value = search_def_and_create_phis(pred_block, mode, 0); + ir_node *value; + + DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %N\n", pred_block)); - get_lstate(block)->ssalink = value; - //set_irn_link(block, value); + value = search_def_and_create_phis(pred_block, mode); + set_link(block, value); mark_irn_visited(block); + return value; } /* create a new Phi */ NEW_ARR_A(ir_node*, in, n_cfgpreds); - for(i = 0; i < n_cfgpreds; ++i) + for (i = 0; i < n_cfgpreds; ++i) in[i] = new_Unknown(mode); phi = new_r_Phi(block, n_cfgpreds, in, mode); - //set_irn_link(block, phi); - get_lstate(block)->ssalink = phi; + + /* Important: always keep block phi list up to date. */ + add_Block_phi(block, phi); + /* EVERY node is assumed to have a node_info linked. */ + alloc_node_info(phi, NULL); + + DB((dbg, LEVEL_5, "ssa phi creation: link new phi %N to block %N\n", phi, block)); + + set_link(block, phi); mark_irn_visited(block); /* set Phi predecessors */ - for(i = 0; i < n_cfgpreds; ++i) { + for (i = 0; i < n_cfgpreds; ++i) { + ir_node *pred_val; ir_node *pred_block = get_Block_cfgpred_block(block, i); - ir_node *pred_val = search_def_and_create_phis(pred_block, mode, 0); + assert(pred_block != NULL); + pred_val = search_def_and_create_phis(pred_block, mode); + assert(pred_val != NULL); + DB((dbg, LEVEL_5, "ssa phi pred:phi %N, pred %N\n", phi, pred_val)); set_irn_n(phi, i, pred_val); } return phi; @@ -569,7 +461,7 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, /** * Given a set of values this function constructs SSA-form for the users of the * first value (the users are determined through the out-edges of the value). - * Uses the irn_visited flags. Works without using the dominance tree. + * Works without using the dominance tree. */ static void construct_ssa(ir_node *orig_block, ir_node *orig_val, ir_node *second_block, ir_node *second_val) @@ -579,16 +471,20 @@ static void construct_ssa(ir_node *orig_block, ir_node *orig_val, const ir_edge_t *edge; const ir_edge_t *next; + assert(orig_block && orig_val && second_block && second_val && + "no parameter of construct_ssa may be NULL"); + /* no need to do anything */ if (orig_val == second_val) return; irg = get_irn_irg(orig_val); + + ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED); inc_irg_visited(irg); mode = get_irn_mode(orig_val); - get_lstate(orig_block)->ssalink = orig_val; - //set_irn_link(orig_block, orig_val); + set_link(orig_block, orig_val); mark_irn_visited(orig_block); ssa_second_def_block = second_block; @@ -605,586 +501,1115 @@ static void construct_ssa(ir_node *orig_block, ir_node *orig_val, if (is_End(user)) continue; - //DB((dbg, LEVEL_3, ">>> Fixing user %+F (pred %d == %+F)\n", user, j, get_irn_n(user, j))); + DB((dbg, LEVEL_5, "original user %N\n", user)); if (is_Phi(user)) { ir_node *pred_block = get_Block_cfgpred_block(user_block, j); - newval = search_def_and_create_phis(pred_block, mode, 1); + newval = search_def_and_create_phis(pred_block, mode); } else { - newval = search_def_and_create_phis(user_block, mode, 1); + newval = search_def_and_create_phis(user_block, mode); } - /* don't fix newly created Phis from the SSA construction */ - if (newval != user) { - //DB((dbg, LEVEL_4, ">>>> Setting input %d of %+F to %+F\n", j, user, newval)); + /* If we get a bad node the user keeps the original in. No second definition needed. */ + if (newval != user && !is_Bad(newval)) set_irn_n(user, j, newval); + } + + ir_free_resources(irg, IR_RESOURCE_IRN_VISITED); +} + +/* + * Construct SSA for def and all of its copies. + */ +static void construct_ssa_n(ir_node *def, ir_node *user) +{ + ir_graph *irg; + ir_mode *mode; + ir_node *iter = def; + const ir_edge_t *edge; + const ir_edge_t *next; + irg = get_irn_irg(def); + + ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED); + inc_irg_visited(irg); + + mode = get_irn_mode(def); + + for_each_copy(iter) { + set_link(get_nodes_block(iter), iter); + mark_irn_visited(get_nodes_block(iter)); + + DB((dbg, LEVEL_5, "ssa_n: Link def %N to block %N\n", + iter, get_nodes_block(iter))); + } + + /* Need to search the outs, because we need the in-pos on the user node. */ + foreach_out_edge_safe(def, edge, next) { + ir_node *edge_user = get_edge_src_irn(edge); + int edge_src = get_edge_src_pos(edge); + ir_node *user_block = get_nodes_block(user); + ir_node *newval; + + if (edge_user != user) + continue; + + if (is_Phi(user)) { + ir_node *pred_block = get_Block_cfgpred_block(user_block, edge_src); + newval = search_def_and_create_phis(pred_block, mode); + } else { + newval = search_def_and_create_phis(user_block, mode); } + + if (newval != user && !is_Bad(newval)) + set_irn_n(user, edge_src, newval); } + + ir_free_resources(irg, IR_RESOURCE_IRN_VISITED); } +/** + * Construct SSA for all definitions in arr. + */ +static void construct_ssa_foreach(ir_node **arr, int arr_n) +{ + int i; + for (i = 0; i < arr_n ; ++i) { + ir_node *cppred, *block, *cpblock, *pred; + + pred = arr[i]; + cppred = get_copy(pred); + block = get_nodes_block(pred); + cpblock = get_nodes_block(cppred); + construct_ssa(block, pred, cpblock, cppred); + } +} +/* get the number of backedges without alien bes */ +static int get_backedge_n(ir_node *loophead, unsigned with_alien) +{ + int i; + int be_n = 0; + int arity = get_irn_arity(loophead); + for (i = 0; i < arity; ++i) { + ir_node *pred = get_irn_n(loophead, i); + if (is_backedge(loophead, i) && (with_alien || is_in_loop(pred))) + ++be_n; + } + return be_n; +} /** - * Rewires the heads after peeling + * Rewires the heads after peeling. */ -void fix_head(ir_node *loophead) +static void peel_fix_heads(void) { + ir_node **loopheadnins, **peelheadnins; + ir_node *loophead = loop_cf_head; + ir_node *peelhead = get_copy(loophead); + int headarity = get_irn_arity(loophead); - int i; - ir_node **loopheadnins; - ir_node **peelheadnins; ir_node *phi; - ir_node *peelhead = get_copy_of(loophead); + int i; + int lheadin_c = 0; int pheadin_c = 0; - /** - * the loopheads new preds are: - * its own backedge(s) and the former backedge(s) of the peeled code - */ - int lhead_arity = 2 * ARR_LEN(backedges); - int phead_arity = headarity - ARR_LEN(backedges); + int backedges_n = get_backedge_n(loophead, 0); + + int lhead_arity = 2 * backedges_n; + int phead_arity = headarity - backedges_n; + /* new in arrays */ NEW_ARR_A(ir_node *, loopheadnins, lhead_arity ); NEW_ARR_A(ir_node *, peelheadnins, phead_arity ); - phi = get_Block_phis(loophead); - while(phi) { - NEW_ARR_A(ir_node *, get_lstate(phi)->ins, lhead_arity); - phi=get_Phi_next(phi); + for_each_phi(loophead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity); } - - phi = get_Block_phis(peelhead); - while(phi) - { - NEW_ARR_A(ir_node *, get_lstate(phi)->ins, phead_arity); - phi=get_Phi_next(phi); + for_each_phi(peelhead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity); } for (i = 0; i < headarity; i++) { - ir_node *phi; ir_node *orgjmp = get_irn_n(loophead, i); - ir_node *copyjmp = get_copy_of(orgjmp); + ir_node *copyjmp = get_copy(orgjmp); /** * Rewire the head blocks ins and their phi ins. - * Requires blocks phi list. - * - * 1. Alien bes origin from the peeled head (new head of the whole loop) - * 2. Loops own bes must be kept/copied to the loophead. - * 3. All other edges origin from the peeled head (new head of the loop) + * Requires phi list per block. */ + if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) { + loopheadnins[lheadin_c] = orgjmp; + for_each_phi(loophead, phi) { + get_node_info( phi )->ins[lheadin_c] = get_irn_n( phi, i) ; + } + ++lheadin_c; + /* former bes of the peeled code origin now from the loophead */ + loopheadnins[lheadin_c] = copyjmp; - //printf("head i %d\n", i); - - if (is_backedge(loophead, i)) - { - if (is_alien_edge(loophead, i)) { - peelheadnins[pheadin_c] = orgjmp; /* alien bes go to the peeled head */ - //set_backedge(peelhead, pheadin_c); - - // alien bes origin at the peeled head - for_each_phi(peelhead, phi) - { - //printf("alienbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr); - get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i); - } - //printf("alienbe %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr); - ++pheadin_c; - } else { - loopheadnins[lheadin_c] = orgjmp; /* keep/copy the loops own bes */ - //set_backedge(loophead, lheadin_c); - - for_each_phi(loophead, phi) { - //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr); - get_lstate( phi )->ins[lheadin_c] = get_irn_n(phi, i); - } - //printf("normalbe %ld @ %d -> add to loophead orgjump %ld\n", loophead->node_nr, i, orgjmp->node_nr); - ++lheadin_c; - - loopheadnins[lheadin_c] = copyjmp; /* former bes of the peeled code origin now from the loophead */ - //set_not_backedge(loophead, lheadin_c); - - /* get_irn_n( get_copy_of(phi), i) get_copy_of(get_irn_n( phi, i)) - * Order is crucial! Preds outside of the loop are non existent, like Const. - */ - for_each_phi(loophead, phi) { - //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( get_copy_of(phi), i)->node_nr); - get_lstate( phi )->ins[lheadin_c] = get_irn_n( get_copy_of(phi), i) ; - } - //printf("normalbe %ld @ %d -> add to loophead copyjump %ld\n", loophead->node_nr, i, copyjmp->node_nr); - ++lheadin_c; + /* get_irn_n( get_copy_of(phi), i ) get_copy_of( get_irn_n( phi, i) ) + * Order is crucial! Predecessors outside of the loop are non existent. + * The copy (cloned with its ins!) has pred i, + * but phis pred i might not have a copy of itself. + */ + for_each_phi(loophead, phi) { + get_node_info( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ; } + ++lheadin_c; } else { peelheadnins[pheadin_c] = orgjmp; - //set_not_backedge(peelhead, pheadin_c); - for_each_phi(peelhead, phi) { - //printf("edge phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( phi, i)->node_nr); - get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i); + get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i); } - //printf("edge %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr); ++pheadin_c; } }/* for */ -// printf("pheadin %d arr %d lheadin %d arr %d \n", -// pheadin_c, ARR_LEN(peelheadnins), -// lheadin_c, ARR_LEN(loopheadnins)); - assert(pheadin_c == ARR_LEN(peelheadnins) && lheadin_c == ARR_LEN(loopheadnins) && - "the number of head elements does not match the predefined one"); + "the constructed head arities do not match the predefined arities"); + /* assign the ins to the nodes */ set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins); set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins); for_each_phi(loophead, phi) { - ir_node **ins = get_lstate( phi )->ins; + ir_node **ins = get_node_info( phi )->ins; set_irn_in(phi, lhead_arity, ins); } for_each_phi(peelhead, phi) { - ir_node **ins = get_lstate( phi )->ins; + ir_node **ins = get_node_info( phi )->ins; set_irn_in(phi, phead_arity, ins); } } +/** + * Create a raw copy (ins are still the old ones) of the given node. + * We rely on copies to be NOT visited. + */ +static ir_node *rawcopy_node(ir_node *node) +{ + int i, arity; + ir_node *cp; + node_info *cpstate; + + cp = exact_copy(node); + + arity = get_irn_arity(node); + + for (i = 0; i < arity; ++i) { + if (is_backedge(node, i)) + set_backedge(cp, i); + } + + set_copy(node, cp); + cpstate = new_node_info(); + set_irn_link(cp, cpstate); + + + if (is_Block(cp)) { + /* TODO + * exact_copy already sets Macroblock. + * Why should we do this anyway? */ + set_Block_MacroBlock(cp, cp); + } + + return cp; +} /** - * Peels the loop by copying the contents. Graph needs some rewiring after that. + * This walker copies all walked nodes. + * If the walk_condition is true for a node, it is walked. + * All nodes node_info->copy attributes have to be NULL prior to every walk. */ -void peel_walk(ir_node *node, void *env) +static void copy_walk(ir_node *node, walker_condition *walk_condition, ir_loop *set_loop) { int i; int arity; ir_node *cp; ir_node **cpin; ir_graph *irg = current_ir_graph; - link_node_state_t *cpstate; - (void) env; - - link_node_state_t *nodestate = get_lstate(node); + node_info *node_info = get_node_info(node); /** * break condition and cycle resolver, creating temporary node copies */ - if (node->visited >= irg->visited) - { - if (!nodestate->cloned && !nodestate->temp) - { - /** temporary clone this node - * because we were here before and would walk into a cycle - */ - cp = exact_copy(node); - //DBG - //printf("COPY TEMP : %ld -T> %ld \n", node->node_nr, cp->node_nr); - nodestate->link = cp; - if (is_Block(cp)) - cp->loop = NULL; - cpstate = XMALLOCZ(link_node_state_t); - cp->link = cpstate; - nodestate->temp=1; - set_irn_visited(cp, irg->visited); + if (get_irn_visited(node) >= get_irg_visited(irg)) { + /* Here we rely on nodestate's copy being initialized with NULL */ + DB((dbg, LEVEL_5, "copy_walk: We have already visited %N\n", node)); + if (node_info->copy == NULL) { + cp = rawcopy_node(node); + DB((dbg, LEVEL_5, "The TEMP copy of %N is created %N\n", node, cp)); } return; } - //printf(" ----- WALK %ld ----- \n", node->node_nr); - /** - * WALK - */ - set_irn_visited(node, irg->visited); + /* Walk */ + mark_irn_visited(node); - if ( !is_Block(node) ) { - ir_node *pred = get_irn_n(node, -1); - if (is_in_loop(pred)) - peel_walk(pred, NULL); + if (!is_Block(node)) { + ir_node *pred = get_nodes_block(node); + if (walk_condition(pred)) + DB((dbg, LEVEL_5, "walk block %N\n", pred)); + copy_walk(pred, walk_condition, set_loop); } arity = get_irn_arity(node); NEW_ARR_A(ir_node *, cpin, arity); - for (i = get_irn_arity(node) - 1; i >= 0; --i) { + for (i = 0; i < arity; ++i) { ir_node *pred = get_irn_n(node, i); - if (is_in_loop(pred)) - { - peel_walk(pred, NULL); - cpin[i] = get_lstate(pred)->link; - //printf("copy of %ld gets in %ld", node->node_nr, cpin[i]->node_nr); + if (walk_condition(pred)) { + DB((dbg, LEVEL_5, "walk node %N\n", pred)); + copy_walk(pred, walk_condition, set_loop); + cpin[i] = get_copy(pred); + DB((dbg, LEVEL_5, "copy of %N gets new in %N which is copy of %N\n", + node, get_copy(pred), pred)); } else { cpin[i] = pred; - } - //printf("copy of %ld gets in %ld \n", node->node_nr, cpin[i]->node_nr); } - /** - * copy node / finalize temp node - */ - if (!nodestate->temp) - { -// if (!is_Const(node) && !is_SymConst(node)) { - cp = exact_copy(node); - //DBG - //printf("COPY FINAL: %ld -F> %ld \n", node->node_nr, cp->node_nr); - nodestate->link = cp; - cpstate = XMALLOCZ(link_node_state_t); - cp->link = cpstate; - if (is_Block(cp)) - cp->loop = NULL; - set_irn_visited(cp, irg->visited); -// } else { -// cp = node; -// //DBG -// printf("CONST FINAL: %ld -F> %ld \n", node->node_nr, cp->node_nr); -// nodestate->link = cp; -// } + /* copy node / finalize temp node */ + if (node_info->copy == NULL) { + /* No temporary copy existent */ + cp = rawcopy_node(node); + DB((dbg, LEVEL_5, "The FINAL copy of %N is CREATED %N\n", node, cp)); } else { /* temporary copy is existent but without correct ins */ - cp = nodestate->link; - //printf("FINALIZE: %ld \n", cp->node_nr); + cp = get_copy(node); + DB((dbg, LEVEL_5, "The FINAL copy of %N is EXISTENT %N\n", node, cp)); } - //TODO REM - //add_End_keepalive(get_irg_end(current_ir_graph), cp ); + if (!is_Block(node)) { + ir_node *cpblock = get_copy(get_nodes_block(node)); - if (!is_Block(node)) - { - ir_node *cpblock = get_copy_of(get_nodes_block(node)); - - /* set the block of the copy to the copied block */ - //printf(" PRE NODE %ld BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr); set_nodes_block(cp, cpblock ); - //printf(" POST NODE %ld BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr); - - /* fix the phi information in attr.phis (does not add the phi node to the block) */ - if( is_Phi(cp) ) - { + if (is_Phi(cp)) add_Block_phi(cpblock, cp); - //printf("PHI-BLOCK block %ld got its phi %ld\n", cpblock->node_nr, cp->node_nr); - } } - else { - /* macroblock info is not copied */ - set_Block_MacroBlock(cp, cp); - } - - //dbg valid ins? -// for(i=0; inode_nr, cp->node_nr, cpin[i]->node_nr); + set_irn_loop(cp, set_loop); set_irn_in(cp, ARR_LEN(cpin), cpin); - -// for(i=0; i< ARR_LEN(cpin); i++) -// { -// printf("ins %ld: %ld \n", cp->node_nr, cpin[i]->node_nr); -// } - -//TODO REM -// if (!nodestate->temp) -// { -// nodestate->link = cp; -// cpstate = XMALLOCZ(link_node_state_t); -// cp->link = cpstate; -// } else { -// /* temporary copy is existent but without correct ins */ -// cp = nodestate->link; -// } - - - nodestate->temp = 0; - nodestate->cloned = 1; } -//void chklink (ir_node *n, void * e) -//{ -// ir_node *link = n->link; -// link_node_state_t *l = (link_node_state_t *)link; -// -// printf("n %ld\n", n->node_nr); -// printf("l p %ld\n", l->link); -// if (l->link) -// printf("l %ld\n", l->link->node_nr); -// -//} - /** * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds */ -void peel(void) +static void peel(out_edge *loop_outs) { int i; ir_node **entry_buffer; int entry_c = 0; - int entry_i; - NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_entries)); + ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); - for(i = 0; i < ARR_LEN(loop_entries); i++) - { - loop_entry_t entry = loop_entries[i]; + NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs)); + + /* duplicate loop walk */ + inc_irg_visited(current_ir_graph); + + for (i = 0; i < ARR_LEN(loop_outs); i++) { + out_edge entry = loop_outs[i]; ir_node *node = entry.node; ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n); if (is_Block(node)) { - /* node is block and the given pred points inside the loop */ - ir_node *cppred; - - peel_walk( pred, 0); - - // leave keepalives out - if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) { - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); - } else { - cppred = get_copy_of(pred); - //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr); - add_pred( node, cppred ); - //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr); + copy_walk(pred, is_in_loop, NULL); + duplicate_preds(node, entry.pred_irn_n, get_copy(pred) ); + } else { + copy_walk(pred, is_in_loop, NULL); + /* leave out keepalives */ + if (!is_End(node)) { + /* Node is user of a value defined inside the loop. + * We'll need a phi since we duplicated the loop. */ + /* Cannot construct_ssa here, because it needs another walker. */ + entry_buffer[entry_c] = pred; + ++entry_c; } + } + } - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); + ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); - //DBG - //phifix(node, cppred); + /* Rewires the 2 heads */ + peel_fix_heads(); + + /* Generate phis for values from peeled code and original loop */ + construct_ssa_foreach(entry_buffer, entry_c); + /*for (i = 0; i < entry_c; i++) + { + ir_node *cppred, *block, *cpblock, *pred; + + pred = entry_buffer[i]; + cppred = get_copy(pred); + block = get_nodes_block(pred); + cpblock = get_nodes_block(cppred); + construct_ssa(block, pred, cpblock, cppred); + }*/ +} + +/** + * Populates head_entries with (node, pred_pos) tuple + * whereas the node's pred at pred_pos is in the head but not the node itself. + * Head and condition chain blocks must be marked. + */ +static void get_head_outs(ir_node *node, void *env) +{ + int i; + int arity = get_irn_arity(node); + (void) env; + + DB((dbg, LEVEL_5, "get head entries %N \n", node)); + + for (i = 0; i < arity; ++i) { + /* node is not in the head, but the predecessor is. + * (head or loop chain nodes are marked) */ + + DB((dbg, LEVEL_5, "... ")); + DB((dbg, LEVEL_5, "node %N marked %d (0) pred %d marked %d (1) \n", + node->node_nr, is_nodesblock_marked(node),i, is_nodesblock_marked(get_irn_n(node, i)))); + + if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) { + out_edge entry; + entry.node = node; + entry.pred_irn_n = i; + DB((dbg, LEVEL_5, + "Found head chain entry %N @%d because !inloop %N and inloop %N\n", + node, i, node, get_irn_n(node, i))); + ARR_APP1(out_edge, cur_head_outs, entry); + } + } +} + +/** + * Find condition chains, and add them to be inverted, until the node count exceeds the limit. + * A block belongs to the chain if a condition branches out of the loop. + * Returns 1 if the given block belongs to the condition chain. + */ +static unsigned find_condition_chains(ir_node *block) +{ + const ir_edge_t *edge; + unsigned mark = 0; + int nodes_n = 0; + + DB((dbg, LEVEL_5, "condition_chains for block %N\n", block)); + + /* Collect all outs, including keeps. + * (TODO firm function for number of out edges?) */ + foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) { + ++nodes_n; + } + + /* We do not want to collect more nodes from condition chains, than the limit allows us to. + * Also, leave at least one block as body. */ + if (head_inversion_node_count + nodes_n > inversion_head_node_limit + || head_inversion_block_count + 1 == loop_info.blocks) { + set_Block_mark(block, 0); + + return 0; + } + + /* First: check our successors, and add all succs that are outside of the loop to the list */ + foreach_block_succ(block, edge) { + ir_node *src = get_edge_src_irn( edge ); + int pos = get_edge_src_pos( edge ); + + if (!is_in_loop(src)) { + out_edge entry; + + mark = 1; + entry.node = src; + entry.pred_irn_n = pos; + ARR_APP1(out_edge, cond_chain_entries, entry); + mark_irn_visited(src); + } + } + + if (mark == 0) { + /* this block is not part of the chain, + * because the chain would become too long or we have no successor outside of the loop */ + + set_Block_mark(block, 0); + return 0; + } else { + set_Block_mark(block, 1); + ++head_inversion_block_count; + DB((dbg, LEVEL_5, "block %N is part of condition chain\n", block)); + head_inversion_node_count += nodes_n; + } + + /* Second: walk all successors, and add them to the list if they are not part of the chain */ + foreach_block_succ(block, edge) { + unsigned inchain; + ir_node *src = get_edge_src_irn( edge ); + int pos = get_edge_src_pos( edge ); + + /* already done cases */ + if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) { + continue; + } + + mark_irn_visited(src); + DB((dbg, LEVEL_5, "condition chain walk %N\n", src)); + inchain = find_condition_chains(src); + + /* if successor is not part of chain we need to collect its outs */ + if (!inchain) { + out_edge entry; + entry.node = src; + entry.pred_irn_n = pos; + ARR_APP1(out_edge, cond_chain_entries, entry); + } + } + return mark; +} + +/** + * Rewire the loop head and inverted head for loop inversion. + */ +static void inversion_fix_heads(void) +{ + ir_node **loopheadnins, **invheadnins; + ir_node *loophead = loop_cf_head; + ir_node *invhead = get_copy(loophead); + + int headarity = get_irn_arity(loophead); + ir_node *phi; + int i; + + int lheadin_c = 0; + int iheadin_c = 0; + + int backedges_n = get_backedge_n(loophead, 0); + int lhead_arity = backedges_n; + int ihead_arity = headarity - backedges_n; + + assert(lhead_arity != 0 && "Loophead has arity 0. Probably wrong backedge informations."); + assert(ihead_arity != 0 && "Inversionhead has arity 0. Probably wrong backedge informations."); + + /* new in arrays for all phis in the head blocks */ + NEW_ARR_A(ir_node *, loopheadnins, lhead_arity); + NEW_ARR_A(ir_node *, invheadnins, ihead_arity); + + for_each_phi(loophead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity); + } + for_each_phi(invhead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity); + } + + for (i = 0; i < headarity; i++) { + ir_node *pred = get_irn_n(loophead, i); + + /** + * Rewire the head blocks ins and their phi ins. + * Requires phi list per block. + */ + if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) { + /* just copy these edges */ + loopheadnins[lheadin_c] = pred; + for_each_phi(loophead, phi) { + get_node_info(phi)->ins[lheadin_c] = get_irn_n(phi, i); + } + ++lheadin_c; } else { - /* node is somewhere in the graph, outside of the loop */ - //ir_node *cppred; - //ir_node *block; - //ir_node *cpblock; - peel_walk( pred, 0); - - // no ssa for keepalives - if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) { - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); - } else { - //printf("fix entry %ld to %ld\n", node->node_nr, pred->node_nr); - entry_buffer[entry_c++] = pred; + invheadnins[iheadin_c] = pred; + for_each_phi(invhead, phi) { + get_node_info(phi)->ins[iheadin_c] = get_irn_n(phi, i) ; } + ++iheadin_c; + } + } - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); + /* assign the ins to the head blocks */ + set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins); + set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins); - // cannot construct_ssa here, because it needs another walker + /* assign the ins for the phis */ + for_each_phi(loophead, phi) { + ir_node **ins = get_node_info(phi)->ins; + set_irn_in(phi, lhead_arity, ins); + } + + for_each_phi(invhead, phi) { + ir_node **ins = get_node_info(phi)->ins; + set_irn_in(phi, ihead_arity, ins); + } +} + +static void inversion_walk(out_edge *head_entries) +{ + int i; + ir_node *phi; + int entry_c = 0; + ir_node **entry_buffer; + ir_node **head_phi_assign; - } /* is block */ - } /* for */ + NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries)); - //irg_walk_graph(current_ir_graph, chklink, NULL, NULL); + head_phi_assign = NEW_ARR_F(ir_node *, 0); - fix_head(loop_cf_head); + /* Find assignments in the condition chain, + * to construct_ssa for them after the loop inversion. */ + for_each_phi(loop_cf_head , phi) { + int arity = get_irn_arity(phi); + for (i = 0; i < arity; ++i) { + ir_node *def = get_irn_n(phi, i); + if (is_nodesblock_marked(def)) { + ARR_APP1(ir_node *, head_phi_assign, def); + } + } + } - //printf (" FIXHEAD DONE :D \n"); + ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); - entry_i = 0; + /** + * duplicate condition chain + **/ + inc_irg_visited(current_ir_graph); - /* Generate phis for values from peeled code and original loop */ - for(i = 0; entry_i < entry_c; i++) - { - loop_entry_t entry = loop_entries[i]; + for (i = 0; i < ARR_LEN(head_entries); ++i) { + out_edge entry = head_entries[i]; ir_node *node = entry.node; + ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n); - if (is_Block(node)) - { - /* block */ - ir_node *phi=get_Block_phis(node); + if (is_Block(node)) { + DB((dbg, LEVEL_5, "\nInit walk block %N\n", pred)); - while(phi) - { - add_pred(phi, entry_buffer[entry_i++]); - phi=get_Phi_next(phi); - } + copy_walk(pred, is_nodesblock_marked, cur_loop); + duplicate_preds(node, entry.pred_irn_n, get_copy(pred) ); } else { - /* not block */ + DB((dbg, LEVEL_5, "\nInit walk node %N\n", pred)); - ir_node *cppred, *block, *cpblock, *pred; + copy_walk(pred, is_nodesblock_marked, cur_loop); - /** - * pred = get_irn_n(entry.node, entry.pred_irn_n); - * does not work, because we could have changed the nodes preds in construct_ssa - */ + /* ignore keepalives */ + if (!is_End(node)) { + /* Node is user of a value assigned inside the loop. + * We will need a phi since we duplicated the head. */ + entry_buffer[entry_c] = pred; + ++entry_c; + } + } + } + + ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); + + inversion_fix_heads(); + + /* Generate phis for users of values assigned in the condition chain + * and read in the loops body */ + construct_ssa_foreach(entry_buffer, entry_c); + + /* Generate phis for values that are assigned in the condition chain + * but not read in the loops body. */ + construct_ssa_foreach(head_phi_assign, ARR_LEN(head_phi_assign)); + + loop_cf_head = get_copy(loop_cf_head); +} + +/* Loop peeling */ +static void loop_peeling(void) +{ + cur_loop_outs = NEW_ARR_F(out_edge, 0); + irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL ); + + peel(cur_loop_outs); + + /* clean up */ + reset_node_infos(); + + set_irg_doms_inconsistent(current_ir_graph); + set_irg_loopinfo_inconsistent(current_ir_graph); + set_irg_outs_inconsistent(current_ir_graph); + + DEL_ARR_F(cur_loop_outs); +} + +/* Loop inversion */ +static void loop_inversion(void) +{ + unsigned do_inversion = 1; + + inversion_head_node_limit = INT_MAX; + + /* Search for condition chains. */ + ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK); + + irg_walk_graph(current_ir_graph, reset_block_mark, NULL, NULL); + + loop_info.blocks = get_loop_n_blocks(cur_loop); + cond_chain_entries = NEW_ARR_F(out_edge, 0); + + head_inversion_node_count = 0; + head_inversion_block_count = 0; + + set_Block_mark(loop_cf_head, 1); + mark_irn_visited(loop_cf_head); + inc_irg_visited(current_ir_graph); + + find_condition_chains(loop_cf_head); + + DB((dbg, LEVEL_3, "Loop contains %d blocks.\n", loop_info.blocks)); + if (loop_info.blocks < 2) { + do_inversion = 0; + DB((dbg, LEVEL_3, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks)); + } + + /* We also catch endless loops here, + * because they do not have a condition chain. */ + if (head_inversion_block_count < 1) { + do_inversion = 0; + DB((dbg, LEVEL_3, "Loop contains %d (less than 1) invertible blocks => No Inversion done.\n", head_inversion_block_count)); + } + + if (do_inversion) { + cur_head_outs = NEW_ARR_F(out_edge, 0); + + /* Get all edges pointing into the head or condition chain (outs). */ + irg_walk_graph(current_ir_graph, get_head_outs, NULL, NULL); + inversion_walk(cur_head_outs); + + DEL_ARR_F(cur_head_outs); + + set_irg_doms_inconsistent(current_ir_graph); + set_irg_loopinfo_inconsistent(current_ir_graph); + set_irg_outs_inconsistent(current_ir_graph); + } + + /* free */ + DEL_ARR_F(cond_chain_entries); + ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK); +} + +/** + * Returns last element of linked list of copies by + * walking the linked list. + */ +static ir_node *get_last_copy(ir_node *node) +{ + ir_node *copy, *cur; + cur = node; + while ((copy = get_copy(cur))) { + cur = copy; + } + return cur; +} + +/** + * Rewire floating copies of the current loop. + */ +static void unrolling_fix_cf(void) +{ + ir_node *loophead = loop_cf_head; + int headarity = get_irn_arity(loophead); + ir_node *phi, *headnode; + /*ir_node *high, *low;*/ + int i; + + int uhead_in_n = 0; + int backedges_n = get_backedge_n(loophead, 0); + int unroll_arity = backedges_n; - pred = entry_buffer[entry_i++]; + /* Create ins for all heads and their phis */ + headnode = get_copy(loophead); + for_each_copy(headnode) { + NEW_ARR_A(ir_node *, get_node_info(headnode)->ins, unroll_arity); + for_each_phi(headnode, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, unroll_arity); + } + } - //printf("pred %ld\n", pred->node_nr); - cppred = get_copy_of(pred); - //printf("cppred %ld\n", cppred->node_nr); - block = get_nodes_block(pred); - //printf("block %ld\n", block->node_nr); - cpblock = get_nodes_block(cppred); - //printf("cpblock %ld\n", cpblock->node_nr); + /* Append the copies to the existing loop. */ + for (i = 0; i < headarity; i++) { + ir_node *upper_head = loophead; + ir_node *lower_head = get_copy(loophead); + ir_node *upper_pred = get_irn_n(loophead, i); + ir_node *lower_pred = get_copy(get_irn_n(loophead, i)); - //dump_ir_block_graph(current_ir_graph, "vorher"); - construct_ssa(block, pred, cpblock, cppred); - //add_End_keepalive(get_irg_end(current_ir_graph), cppred); + ir_node *last_pred; + + /** + * Build unrolled loop top down + */ + if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) { + for_each_copy2(upper_head, lower_head, upper_pred, lower_pred) { + get_node_info(lower_head)->ins[uhead_in_n] = upper_pred; + for_each_phi(upper_head, phi) { + ir_node *phi_copy = get_copy(phi); + get_node_info(phi_copy)->ins[uhead_in_n] = get_irn_n(phi, i); + } + } - //add_pred(get_irg_end(current_ir_graph), cppred); - //dump_ir_block_graph(current_ir_graph, "nachher"); + last_pred = upper_pred; + ++uhead_in_n; + /* Fix the topmost loop heads backedges. */ + set_irn_n(loophead, i, last_pred); + for_each_phi(loophead, phi) { + ir_node *last_phi = get_last_copy(phi); + ir_node *pred = get_irn_n(last_phi, i); + set_irn_n(phi, i, pred); + } + } + } + headnode = get_copy(loophead); + for_each_copy(headnode) { + set_irn_in(headnode, unroll_arity, get_node_info(headnode)->ins); + for_each_phi(headnode, phi) { + set_irn_in(phi, unroll_arity, get_node_info(phi)->ins); } } } -void decision_maker(void) +#if 0 +static ir_node *add_phi(ir_node *node, int phi_pos) { - //inc_irg_visited(current_ir_graph); - //loop_walker( loop_entries, NULL, get_invariants, NULL ); + ir_mode *mode; + ir_node *phi; + ir_node **in; + mode = get_irn_mode(get_irn_n(node, phi_pos)); + ir_node *block = get_nodes_block(node); + int n_cfgpreds = get_irn_arity(block); + ir_node *pred = get_irn_n(node, phi_pos); + int i; - inc_irg_visited(current_ir_graph); - peel(); + /* create a new Phi */ + NEW_ARR_A(ir_node*, in, n_cfgpreds); + for (i = 0; i < n_cfgpreds; ++i) + in[i] = new_Unknown(mode); /*pred;*/ + + phi = new_r_Phi(block, n_cfgpreds, in, mode); + + assert(phi && "phi null"); + assert(is_Bad(phi) && "phi bad"); + /* Important: always keep block phi list up to date. */ + add_Block_phi(block, phi); + /* EVERY node is assumed to have a node_info linked. */ + alloc_node_info(phi, NULL); + + set_irn_n(node, phi_pos, phi); + return phi; } +#endif /** - * TODO use list , not arr_F + * Loop unrolling + * Could be improved with variable range informations. */ -void analyze_loop(ir_loop *loop) +static void loop_unrolling(void) { - /* Init new for every loop */ - loop_cf_head = NULL; - loop_cf_head_valid = 1; - has_sto = 0; + int i, j; + + unroll_times = 7; + + cur_loop_outs = NEW_ARR_F(out_edge, 0); + irg_walk_graph( current_ir_graph, get_loop_outs, NULL, NULL ); + + ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); + + /* duplicate whole loop content */ + inc_irg_visited(current_ir_graph); + + for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) { + out_edge entry = cur_loop_outs[i]; + ir_node *node = entry.node; + ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n); + if (!is_Block(node)) { + for (j = 0; j < unroll_times - 1; ++j) { + copy_walk(pred, is_in_loop, cur_loop); + pred = get_copy(pred); + } + } + } + + for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) { + out_edge entry = cur_loop_outs[i]; + ir_node *node = entry.node; + ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n); + + if (is_Block(node)) { + for (j = 0; j < unroll_times - 1; ++j) { + copy_walk(pred, is_in_loop, cur_loop); + duplicate_preds(node, entry.pred_irn_n, get_copy(pred)); + + pred = get_copy(pred); + } + } + } + + ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); + + /*dump_ir_graph(current_ir_graph, "-raw");*/ + /* LDBG 2 */ +#if 1 + /* Line up the floating copies. */ + unrolling_fix_cf(); + + /* Generate phis for all loop outs */ + for (i = 0; i < ARR_LEN(cur_loop_outs); ++i) { + out_edge entry = cur_loop_outs[i]; + ir_node *node = entry.node; + ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n); + + if (!is_Block(node) && !is_End(node)) { + DB((dbg, LEVEL_5, " construct_ssa_n def %N node %N pos %d\n", + pred, node, entry.pred_irn_n)); + construct_ssa_n(pred, node); + } + } +#endif + + DEL_ARR_F(cur_loop_outs); + + set_irg_doms_inconsistent(current_ir_graph); + set_irg_loopinfo_inconsistent(current_ir_graph); + set_irg_outs_inconsistent(current_ir_graph); +} + +/* Initialization and */ +static void init_analyze(ir_loop *loop) +{ + /* Init new for every loop */ cur_loop = loop; - /* arrays */ - backedges = NEW_ARR_F(loop_entry_t, 0); - alien_backedges = NEW_ARR_F(loop_entry_t, 0); - loop_entries = NEW_ARR_F(loop_entry_t, 0); - head_edges = NEW_ARR_F(loop_entry_t, 0); + loop_cf_head = NULL; + loop_cf_head_valid = 1; + loop_inv_head = NULL; + loop_peeled_head = NULL; - inc_irg_visited( current_ir_graph ); - irg_walk_graph( current_ir_graph, block_phi_walker, NULL, NULL ); + loop_info.outs = 0; + loop_info.calls = 0; + loop_info.loads = 0; + loop_info.blocks = 0; - /* Collect all backedges */ - for_each_loop_block(loop, collect_backedges, NULL ); + DB((dbg, LEVEL_2, " >>>> current loop includes node %N <<<\n", get_loop_node(loop, 0))); - /* Find loop entries walk, find head */ - inc_irg_visited( current_ir_graph ); - irg_walk_graph( current_ir_graph, find_loop_entries_walk, NULL, NULL ); + irg_walk_graph(current_ir_graph, get_loop_info, NULL, NULL); /* RETURN if there is no valid head */ - if (!loop_cf_head || !loop_cf_head_valid) - { - //DBG printf("NOTE: There is no valid loop head. Nothing done.\n"); + if (!loop_cf_head || !loop_cf_head_valid) { + DB((dbg, LEVEL_2, "No valid loop head. Nothing done.\n")); return; } - decision_maker(); + if (enable_peeling) + loop_peeling(); - // TODO free all link states... or better put them on functionstack + if (enable_inversion) + loop_inversion(); + if (enable_unrolling) + loop_unrolling(); - /* FREE */ - DEL_ARR_F(loop_entries); - DEL_ARR_F(backedges); - DEL_ARR_F(alien_backedges); - DEL_ARR_F(head_edges); +#if 0 + /* RETURN if there is a call in the loop */ + if (loop_info.calls) + return; +#endif - //dump_ir_block_graph(current_ir_graph, "-lu1"); + DB((dbg, LEVEL_2, " <<<< end of loop with node %N >>>>\n", get_loop_node(loop, 0))); } -/** - * Find most inner loops and send them to analyze_loop - */ -void analyze_inner_loop(ir_loop *loop) +/* Find most inner loops and send them to analyze_loop */ +static void find_most_inner_loop(ir_loop *loop) { /* descend into sons */ int sons = get_loop_n_sons(loop); - //printf("found %d loops \n", sons); + if (sons == 0) { + loop_element elem; + int el_n, i; - if (sons==0) - { - //printf("analyze loop %ld\n", loop->loop_nr); - analyze_loop(loop); - } - else - { + el_n = get_loop_n_elements(loop); + + for (i=0; i < el_n; ++i) { + elem = get_loop_element(loop, i); + /* We can only rely on the blocks, + * as the loop attribute of the nodes seems not to be set. */ + if (is_ir_node(elem.kind) && is_Block(elem.node)) { + ARR_APP1(ir_node *, loops, elem.node); + DB((dbg, LEVEL_5, "Found most inner loop (contains block %+F)\n", elem.node)); + break; + } + } + } else { int s; - for(s=0; sloop_nr); - analyze_inner_loop( get_loop_son(loop, s) ); + for (s=0; snode_nr); -// phi = NULL; -// } else { -// phi=get_Phi_next(phi); -// } -// } -//} - -void loop_unroll(ir_graph *irg) +/** + * Assure preconditions are met and go through all loops. + */ +void loop_optimization(ir_graph *irg) { - //printf(" --- loop unroll start --- \n"); + ir_loop *loop; + int i, sons, nr; - //irg_walk_graph(irg, phicheck, NULL, NULL); + /* Init */ + link_node_state_list = NULL; + set_current_ir_graph(irg); - ir_loop *loop; + /* preconditions */ + edges_assure(irg); + assure_irg_outs(irg); + /* NOTE: sets only the loop attribute of blocks, not nodes */ + /* NOTE: Kills links */ assure_cf_loop(irg); + ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST); + collect_phiprojs(irg); + ir_free_resources(irg, IR_RESOURCE_IRN_LINK); + + /* allocate node_info for additional information on nodes */ + ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); + irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL); + loop = get_irg_loop(irg); - int sons = get_loop_n_sons(loop); - //printf("FOUND %d LOOPS \n", sons); - int nr; - for (nr=0; nr>> unrolling (Startnode %N) <<<\n", + get_irg_start(irg))); + + loop_optimization(irg); + + DB((dbg, LEVEL_2, " >>> unrolling done (Startnode %N) <<<\n", + get_irg_start(irg))); +} + +void do_loop_inversion(ir_graph *irg) +{ + enable_unrolling = 0; + enable_peeling = 0; + enable_inversion = 1; + + DB((dbg, LEVEL_2, " >>> inversion (Startnode %N) <<<\n", + get_irg_start(irg))); + + loop_optimization(irg); - (void)context; - loop_unroll(irg); - return 0; + DB((dbg, LEVEL_2, " >>> inversion done (Startnode %N) <<<\n", + get_irg_start(irg))); +} + +void do_loop_peeling(ir_graph *irg) +{ + enable_unrolling = 0; + enable_peeling = 1; + enable_inversion = 0; + + DB((dbg, LEVEL_2, " >>> peeling (Startnode %N) <<<\n", + get_irg_start(irg))); + + loop_optimization(irg); + + DB((dbg, LEVEL_2, " >>> peeling done (Startnode %N) <<<\n", + get_irg_start(irg))); + +} + +ir_graph_pass_t *loop_inversion_pass(const char *name) +{ + return def_graph_pass(name ? name : "loop_inversion", do_loop_inversion); } -//TODO ?? ir_graph_pass_t *loop_unroll_pass(const char *name) { - struct loop_unroll_pass_t *pass = - XMALLOCZ(struct loop_unroll_pass_t); + return def_graph_pass(name ? name : "loop_unroll", do_loop_unrolling); +} - return def_graph_pass_constructor( - &pass->pass, name ? name : "loop_unroll", - loop_unroll_wrapper); +ir_graph_pass_t *loop_peeling_pass(const char *name) +{ + return def_graph_pass(name ? name : "loop_peeling", do_loop_peeling); } -/* -void firm_init_loopunroll(void) { - FIRM_DBG_REGISTER(dbg, "firm.opt.loopunroll"); -}*/ +void firm_init_loop_opt(void) +{ + FIRM_DBG_REGISTER(dbg, "firm.opt.loop"); +}