From: Christian Helmer Date: Mon, 14 Dec 2009 15:32:53 +0000 (+0000) Subject: Loop peeling and loop inversion functioning but still errors in combination. X-Git-Url: http://nsz.repo.hu/git/?a=commitdiff_plain;h=2cadf15bc35d3b26b94b7c5f2b972e95a23a7be0;p=libfirm Loop peeling and loop inversion functioning but still errors in combination. [r26792] --- diff --git a/ir/opt/loop.c b/ir/opt/loop.c index e6e06ddd6..c130a9f45 100644 --- a/ir/opt/loop.c +++ b/ir/opt/loop.c @@ -23,306 +23,250 @@ * @author Christian Helmer * @version $Id$ */ - -//#include "config.h" - -//#include -#include +#include "config.h" #include "irnode.h" -#include "irnode_t.h" -#include "irgraph_t.h" -//#include "irprog_t.h" +#include "debug.h" -//#include "iroptimize.h" -#include "ircons_t.h" -#include "iropt_t.h" +#include "ircons.h" #include "irgopt.h" -//#include "irgmod.h" +#include "irgmod.h" #include "irgwalk.h" - -//#include "array_t.h" -#include "list.h" -//#include "pset.h" -//#include "pmap.h" -//#include "pdeq.h" -//#include "xmalloc.h" -//#include "pqueue.h" - #include "irouts.h" -#include "irloop_t.h" -#include "irbackedge_t.h" -//#include "opt_inline_t.h" -//#include "cgana.h" -//#include "trouts.h" -//#include "error.h" - -//#include "analyze_irg_args.h" -#include "iredges_t.h" -//#include "irflag_t.h" -//#include "irhooks.h" +#include "iredges.h" #include "irtools.h" -//#include "iropt_dbg.h" -#include "irpass_t.h" -#include "irloop.h" +#include "array_t.h" /* automatic array */ +#include "beutil.h" /* get_block */ -#include "array_t.h" +// TODO during DBG +//#include "irnode_t.h" #include "irdump.h" -/* convenience macro for iterating over every phi node of the given block */ +DEBUG_ONLY(static firm_dbg_module_t *dbg); + +/** + * Convenience macro for iterating over every phi node of the given block. + * Requires phi list per block. + */ #define for_each_phi(block, phi) \ for ( (phi) = get_Block_phis( (block) ); (phi) ; (phi) = get_Phi_next( (phi) ) ) /* current loop */ -ir_loop *cur_loop; +static ir_loop *cur_loop; /* The loop walker should be possible to abort if nothing can be done anymore */ typedef unsigned irg_walk_func_abortable(ir_node *, void *); -/* stores pair of node and number for the nodes predecessor */ -typedef struct loop_entry_t { - ir_node *node; /* node outside of the loop */ - int pred_irn_n; /* with pred_irn_n pointing inside loop */ - //loop_entry_t *next; -} loop_entry_t; +/* condition for breaking a copy_walk */ +typedef unsigned walker_condition(ir_node *); -//loop_entry_t loop_entry_list; +/* stores node and position of a predecessor */ +typedef struct out_edges { + ir_node *node; + int pred_irn_n; +} out_edges; -/* Store complex values in the nodes link */ -typedef struct link_node_state_t { - unsigned cloned:1; - unsigned temp:1; /* < Node is temporarily copied, to resolve cycles */ +/* access complex values through the nodes links */ +typedef struct node_info { unsigned invariant:1; ir_node *copy; - ir_node *link; /*< temporary links for ssa creation */ - ir_node **ins; /* ins for phi nodes, during rewiring of blocks */ -} link_node_state_t; - - -loop_entry_t *loop_entries; /* loop entries (from below) in the node graph */ -//int loop_entries_n; -loop_entry_t *head_entries; /* loop entries (from below) in the node graph */ -int backedges_n; -//loop_entry_t *backedges; /* backedges exclusively from the current loop */ -//loop_entry_t *alien_backedges; /* The head can be head of several loops. */ -//loop_entry_t *head_edges; /* The head can be head of several loops. */ - -ir_node *loop_cf_head = NULL; /* loop exit in the node graph */ -unsigned loop_cf_head_valid = 1; /* a loop may/must have one head, otherwise invalid */ - -unsigned has_sto; /* If we store inside the loop we might - * have disambiguation problems */ -//DBG -//void arrdump(ir_node **arr) -//{ -// int i; -// for (i=0; inode_nr), is_Block(arr[i])); -// } -//} + ir_node *link; /* temporary links for ssa creation */ + ir_node **ins; /* ins for phi nodes, during rewiring of blocks */ + struct node_info *freelistnext; /* linked list to free all node_infos */ +} node_info; + +static node_info *link_node_state_list; /* head of the linked list to free all node_infos */ + +static out_edges *cur_loop_outs; /* A walker may start visiting the current loop with these nodes. */ +static out_edges *cur_head_outs; /* A walker may start visiting the cur head with these nodes. */ + +static ir_node *loop_cf_head = NULL; /* Loop head node */ +static unsigned loop_cf_head_valid = 1; /* A loop may have one head, otherwise we do not touch it. */ + +/* Inverted head */ +static ir_node *loop_inv_head = NULL; +/* Peeled head */ +static ir_node *loop_peeled_head = NULL; + +/* Loop analysis informations */ +typedef struct loop_info_t { + unsigned calls; + unsigned loads; + unsigned invariant_loads; /* number of load nodes */ + unsigned stores; /* number of store nodes */ + unsigned blocks; /* number of blocks in the loop */ + unsigned opnodes_n; /* nodes that should result in an instruction */ + unsigned opnodes_head; +} loop_info_t; + +/* Information about the current loop */ +static loop_info_t loop_info; + +/* A walker may start visiting a condition chain with these nodes. */ +static out_edges *cond_chain_entries; + +static unsigned head_inversion_node_count; +static unsigned head_inversion_node_limit; +//static unsigned head_inversion_block_count; /** - * Returns the state of the given node. + * + * ============= AUXILIARY FUNCTIONS ===================================== */ -link_node_state_t *get_lstate(ir_node *n) -{ - return ((link_node_state_t *)n->link); -} /** - * Returns the link inside of the nodes state which is pointing to its copy - * most of the time during loop peeling. + * Creates object on the heap, and adds it to a linked list to free it later. */ -ir_node *get_copy(ir_node *n) -{ - return ((link_node_state_t *)n->link)->copy; +static node_info *new_node_info(void) { + node_info *l = XMALLOCZ(node_info); + l->freelistnext = link_node_state_list; + link_node_state_list = l; + l->copy = NULL; + l->invariant = 0; + return l; } -/** - * Sets the nodes copy information - */ -void set_copy(ir_node *n, ir_node *copy) +static node_info *get_node_info(ir_node *n) { - ((link_node_state_t *)n->link)->copy = copy; + return ((node_info *)get_irn_link(n)); } -/** - * Returns true if the node or block is in cur_loop. - */ -unsigned is_in_loop(ir_node *node) +/* Allocates a node_info struct for the given node. For use with a walker. */ +static void alloc_node_info(ir_node *node, void *env) { -// if (is_Block(node)) { -// if (node->loop == cur_loop) { -// printf(" INLOOP %ld \n", node->node_nr); -// } -// return (node->loop == cur_loop); -// } else { -// if ( get_nodes_block(node)->loop == cur_loop ) { -// printf(" INLOOP %ld \n", node->node_nr); -// } -// return ( get_nodes_block(node)->loop == cur_loop ); -// } - if (is_Block(node)) { - return (node->loop == cur_loop); - } else { - return ( get_nodes_block(node)->loop == cur_loop ); - } + node_info *state = new_node_info(); + (void) env; + set_irn_link(node, (void *)state); } -unsigned is_in_head(ir_node *node) +static void free_node_info(void) { - if (is_Block(node)) { - return (node == loop_cf_head); - } else { - return ( get_nodes_block(node) == loop_cf_head ); + node_info *next; + next = link_node_state_list; + while(next->freelistnext) { + node_info *cur = next; + next = cur->freelistnext; + xfree( cur ); } } /** - * Returns if the given be is an alien edge + * Use the linked list to reset the reused values of all node_infos + * Reset in particular the copy attribute as copy_walk uses it to determine a present copy */ -unsigned is_alien_edge(ir_node *n, int i) +static void reset_node_infos(void) { - return( !is_in_loop( get_irn_n( n, i ) ) ); + node_info *next; + next = link_node_state_list; + while(next->freelistnext) { + node_info *cur = next; + next = cur->freelistnext; + cur->copy = NULL; + cur->ins = NULL; + cur->link = NULL; + } } -static void add_pred(ir_node* node, ir_node* x) +/* Returns the */ +static ir_node *get_copy(ir_node *n) { - ir_node** ins; - int n; - int i; - -// if(!node) -// printf("NONODE\n"); + return ((node_info *)get_irn_link(n))->copy; +} - //printf("addpred %ld pred %ld \n", node->node_nr, x->node_nr); +/* Links the node to its copy */ +static void set_copy(ir_node *n, ir_node *copy) +{ + ((node_info *)get_irn_link(n) )->copy = copy; +} - // WHY limit it to blocks and phi? - assert(is_Block(node) || is_Phi(node)); +/* Returns 0 if the node or block is not in cur_loop */ +static unsigned is_in_loop(ir_node *node) +{ + return (get_irn_loop(get_block(node)) == cur_loop); +} - n = get_irn_arity(node); - NEW_ARR_A(ir_node*, ins, n + 1); - for (i = 0; i < n; i++) - ins[i] = get_irn_n(node, i); - ins[n] = x; - set_irn_in(node, n + 1, ins); +/* Returns if the given be is an alien edge. This is the case when the pred is not in the loop. */ +static unsigned is_alien_edge(ir_node *n, int i) +{ + return(!is_in_loop(get_irn_n(n, i))); } -void block_phi_walker(ir_node *n, void *env) +/* used for walker */ +static void unmark_block(ir_node *node, void * env) { - const ir_edge_t *edge; (void) env; - - /* RETURN */ - if (!is_Block(n)) - return; - - /* generate phi list for every block */ - n->attr.block.phis = NULL; - - foreach_out_edge(n, edge) { - ir_node *src = get_edge_src_irn(edge); - if (is_Phi(src)) - { - //printf("%ld has phi %ld \n", block->node_nr, src->node_nr); - add_Block_phi(n, src); - } - } +// DB((dbg, LEVEL_1, "UNMARK %ld\n", node->node_nr)); + if(is_Block(node)) + set_Block_mark(node, 0); } -/** - * Calls func() for every block in the given loop. - */ -void for_each_loop_block(ir_loop *loop, irg_walk_func *func, void *env) +static unsigned is_nodesblock_marked(ir_node* node) { - int elements, e; - elements = get_loop_n_elements(loop); - - for(e=0; enode_nr); - func(elem.node, env); - } - } + return (get_Block_mark(get_block(node))); } /** - * collects the blocks backedges and creates the phi list for every block + * Add newpred at position pos to node and also add the corresponding value to the phis. + * Requires block phi list. */ -void collect_backedges(ir_node *block, void *env) +static void duplicate_preds(ir_node* node, unsigned pos, ir_node* newpred) { - (void) env; + ir_node** ins; + ir_node *phi; + int block_arity; + int i; - //printf("LOOP BLOCK %ld\n", block->node_nr); + assert(is_Block(node) && "duplicate_preds is only allowed for blocks"); - /* collect backedges */ - if (has_backedges(block)) - { - int i; - int arity = get_irn_arity(block); + DB((dbg, LEVEL_5, "duplicate_preds(node %ld, pos %d, newpred %ld)\n", get_irn_node_nr(node), pos, get_irn_node_nr(newpred))); - for(i = 0; i < arity; ++i) { - ir_node *pred = get_irn_n(block, i); + block_arity = get_irn_arity(node); - loop_entry_t be; - be.node = block; - be.pred_irn_n = i; + NEW_ARR_A(ir_node*, ins, block_arity + 1 ); + for (i = 0; i < block_arity; ++i) + ins[i] = get_irn_n(node, i); + ins[block_arity] = newpred; - //ARR_APP1(loop_entry_t, head_edges, be); + set_irn_in(node, block_arity + 1, ins); - if (is_backedge(block, i) ) - { - if ( is_in_loop(pred) ) { - //printf("be: %ld --> %ld \n", block->node_nr, pred->node_nr); - //ARR_APP1(loop_entry_t, backedges, be); - ++backedges_n; - } -// else { -// //printf("alien be: %ld --> %ld \n", block->node_nr, pred->node_nr); -// ARR_APP1(loop_entry_t, alien_backedges, be); -// } - } -// else { -// if ( !is_in_loop(pred) ) { -// ARR_APP1(loop_entry_t, head_edges, be); -// } + for_each_phi(node, phi) { + int phi_arity = get_irn_arity(phi); + DB((dbg, LEVEL_5, "duplicate_preds: fixing phi %ld\n", get_irn_node_nr(phi))); + NEW_ARR_A(ir_node *, ins, block_arity + 1); + for(i=0; i < phi_arity; ++i) { + DB((dbg, LEVEL_5, "in %ld\n", get_irn_node_nr(get_irn_n(phi, i)))); + ins[i] = get_irn_n(phi, i); } + ins[block_arity] = get_irn_n(phi, pos); + set_irn_in(phi, block_arity + 1, ins); } } -/** - * Walks through all loop nodes. - */ -unsigned loop_walker_rec(ir_node *node, +/* Walks through all nodes of cur_loop */ +static unsigned loop_walker_rec(ir_node *node, irg_walk_func_abortable *pre, irg_walk_func_abortable *post, void * env) { int i; unsigned stop = 0; - - ir_graph *irg = current_ir_graph; + ir_graph *irg = get_current_ir_graph(); /* RETURN if we walked out of the loop*/ if (!is_in_loop(node)) return 0; - if (pre) - { + if (pre) { unsigned stop = pre(node, env); if (stop) return stop; } - set_irn_visited(node, irg->visited); + set_irn_visited(node, get_irg_visited(irg)); - if (node->op != op_Block) { + if (get_irn_op(node) != op_Block) { ir_node *pred = get_irn_n(node, -1); - if (pred->visited < irg->visited) + if (get_irn_visited(pred) < get_irg_visited(irg)) { stop = loop_walker_rec(pred, pre, post, env); if (stop) @@ -332,8 +276,7 @@ unsigned loop_walker_rec(ir_node *node, for (i = get_irn_arity(node) - 1; i >= 0; --i) { ir_node *pred = get_irn_n(node, i); - if (pred->visited < irg->visited) - { + if (get_irn_visited(pred) < get_irg_visited(irg)) { stop = loop_walker_rec(pred, pre, post, env); if (stop) return stop; @@ -349,16 +292,15 @@ unsigned loop_walker_rec(ir_node *node, * Walks through loop nodes. * The entries of the loop (all edges pointing into the loop) have to be given. */ -unsigned loop_walker(loop_entry_t *entries, +static unsigned loop_walker(out_edges *entries, irg_walk_func_abortable *pre, irg_walk_func_abortable *post, void * env) { int i; int stop = 0; - for (i=0; !stop && i inloop %ld (@ %d) \n", -// node->node_nr, pred->node_nr, i); +/** + * Finds invariant loads and marks them as invariant. + * (has to be post walk) + */ +static unsigned get_invariants(ir_node *node, void *env) +{ + unsigned invar = 1; + int arity = get_irn_arity(node); + (void) env; - ARR_APP1(loop_entry_t, loop_entries, entry); + /* RETURN, no preds to visit */ + if (arity == 0) return 0; + + if (is_Load(node)) { + assert(arity>=2 && "expected load node to have in[1] (address)"); + + ir_node *pred = get_irn_n(node, 1); + if ( (get_Load_volatility(node) == volatility_non_volatile) & + (!is_in_loop(pred) + || is_Const(pred) + || is_SymConst(pred) + || get_node_info(node)->invariant ) ) { + get_node_info(node)->invariant = 1; + ++loop_info.invariant_loads; + } else + { + get_node_info(node)->invariant = 0; + } + } else { + int i; + invar = 1; + /* find loop variant preds */ + for(i = 0; i < arity; ++i) { + ir_node *pred = get_irn_n(node, i); + + if ( is_in_loop(pred) /* outside loop is loop invariant */ + && !is_Const(pred) /* constants */ + && !is_SymConst(pred) /* SymConst */ + && !get_node_info(node)->invariant ) { /* pred is marked as invariant */ + invar = 0; + } + } + + if (invar) { + get_node_info(node)->invariant = 1; + } else { + get_node_info(node)->invariant = 0; } } + return 0; } -///** -// * Finds invariant nodes and marks them as invariant. -// * (Post walk) -// */ -//unsigned get_invariants(ir_node *node, void *env) -//{ -// unsigned invar = 1; -// (void) env; -// -// if (is_Store(node)) -// { -// has_sto = 1; -// /* RETURN and abort walker */ -// return 1; -// } -// -// int arity = get_irn_arity(node); -// -// /* RETURN, no preds to visit */ -// if (arity == 0) return 0; -// -// if (is_Load(node)) -// { -// assert(arity>=2 && "expected load to have edge nr 1 (address)"); -// -// ir_node *pred = get_irn_n(node, 1); -// if (!is_in_loop(pred) /* Everything outside the loop is considered invariant */ -// || is_Const(pred) /* This is not true, but we also want the quasi-invariants. */ -// || is_SymConst(pred) -// || get_lstate(node)->invariant) -// { -// //printf("## CONSTLOAD: %ld \n", node->node_nr); -// get_lstate(node)->invariant = 1; -// } else -// { -// get_lstate(node)->invariant = 0; -// } -// } -// else -// { -// int i; -// invar = 1; -// /* find loop variant preds */ -// for(i = 0; i < arity; ++i) -// { -// ir_node *pred = get_irn_n(node, i); -// -// if ( !(!is_in_loop(pred) /* outside loop is loop invariant */ -// || is_Const(pred) /* constants */ -// || is_SymConst(pred) /* SymConst, if no Store */ -// || get_lstate(node)->invariant /* pred is marked as invariant */ -// ) ) -// { -// invar = 0; -// } -// } -// -// if (invar) { -// printf("const: %ld \n", node->node_nr); -// get_lstate(node)->invariant = 1; -// } else { -// get_lstate(node)->invariant = 0; -// } -//// DBG -//// if (!is_nodes_block_marked(pred)) { -//// //printf("pred outloop: %ld, pred %ld (const)\n", node->node_nr, pred->node_nr); -//// } else if (is_Const(pred) || is_SymConst(pred)) // || is_Phi(pred)) { -//// //printf("predconst: %ld, pred %ld CONST\n", node->node_nr, pred->node_nr); -//// } else if (pred->link == MARKED_CONST) { -//// //printf("predmarked: %ld, pred %ld const\n", node->node_nr, pred->node_nr); -//// } else { -//// mark=0; -//// } -// } -// return 0; -//} - -////TODO DBG Remove -//void phifix(ir_node *node, ir_node *newpred) -//{ -// ir_node *phi=get_Block_phis(node); -// while(phi) -// { -// int pa = get_irn_arity(phi); -// int ba = get_irn_arity(node); -// -// -// -// while(ba>pa) -// { -// printf("!!!!!!!!!! block has %d, phi had %d\n", ba, pa ); -// add_pred(phi, newpred); -// pa++; -// printf("!!!!!!!!!! block has %d, phi has now %d\n", ba, pa ); -// } -// phi=get_Phi_next(phi); -// } -//} static ir_node *ssa_second_def; static ir_node *ssa_second_def_block; /** - * + * Walks the graph bottom up, searching for definitions and create phis. + * (Does not handle the special case where the second definition is in the block of the user + * of the original definition because it is not necessary here.) */ -static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, - int first) +static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode) { int i; int n_cfgpreds; @@ -535,22 +429,22 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, ir_node *phi; ir_node **in; - /* This is needed because we create bads sometimes */ - if (is_Bad(block)) + DB((dbg, LEVEL_5, "ssa sdacp: block %ld\n", get_irn_node_nr(block))); + + /* Prevents creation of phi that would be bad anyway. + * Dead and bad blocks. */ + if (get_irn_arity(block) < 1 || is_Bad(block)) return new_Bad(); - /* the other defs can't be marked for cases where a user of the original - * value is in the same block as the alternative definition. - * In this case we mustn't use the alternative definition. - * So we keep a flag that indicated wether we walked at least 1 block - * away and may use the alternative definition */ - if (block == ssa_second_def_block && !first) { + if (block == ssa_second_def_block) { + DB((dbg, LEVEL_5, "ssa found second definition: use second def %ld\n", get_irn_node_nr(ssa_second_def))); return ssa_second_def; } /* already processed this block? */ if (irn_visited(block)) { - ir_node *value = get_lstate(block)->link; + ir_node *value = get_node_info(block)->link; + DB((dbg, LEVEL_5, "ssa already visited: use linked %ld\n", get_irn_node_nr(value))); return value; } @@ -561,11 +455,14 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, n_cfgpreds = get_Block_n_cfgpreds(block); if (n_cfgpreds == 1) { ir_node *pred_block = get_Block_cfgpred_block(block, 0); - ir_node *value = search_def_and_create_phis(pred_block, mode, 0); + ir_node *value; - get_lstate(block)->link = value; - //set_irn_link(block, value); + DB((dbg, LEVEL_5, "ssa 1 pred: walk pred %ld\n", get_irn_node_nr(pred_block))); + + value = search_def_and_create_phis(pred_block, mode); + get_node_info(block)->link = value; mark_irn_visited(block); + return value; } @@ -575,17 +472,25 @@ static ir_node *search_def_and_create_phis(ir_node *block, ir_mode *mode, in[i] = new_Unknown(mode); phi = new_r_Phi(block, n_cfgpreds, in, mode); - //set_irn_link(block, phi); - get_lstate(block)->link = phi; + + /* Important: always keep block phi list up to date. */ + add_Block_phi(block, phi); + /* EVERY node is assumed to have a node_info linked. */ + alloc_node_info(phi, NULL); + + DB((dbg, LEVEL_5, "ssa phi creation: link new phi %ld to block %ld\n", get_irn_node_nr(phi), get_irn_node_nr(block))); + + get_node_info(block)->link = phi; mark_irn_visited(block); /* set Phi predecessors */ for(i = 0; i < n_cfgpreds; ++i) { ir_node *pred_block = get_Block_cfgpred_block(block, i); - ir_node *pred_val = search_def_and_create_phis(pred_block, mode, 0); - + ir_node *pred_val = search_def_and_create_phis(pred_block, mode); + DB((dbg, LEVEL_5, "ssa phi pred:phi %ld, pred %ld\n", get_irn_node_nr(phi), get_irn_node_nr(pred_val))); set_irn_n(phi, i, pred_val); } + return phi; } @@ -602,16 +507,20 @@ static void construct_ssa(ir_node *orig_block, ir_node *orig_val, const ir_edge_t *edge; const ir_edge_t *next; + assert(orig_block && orig_val && second_block && second_val && + "no parameter of construct_ssa may be NULL"); + /* no need to do anything */ if (orig_val == second_val) return; irg = get_irn_irg(orig_val); + + ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED); inc_irg_visited(irg); mode = get_irn_mode(orig_val); - get_lstate(orig_block)->link = orig_val; - //set_irn_link(orig_block, orig_val); + get_node_info(orig_block)->link = orig_val; mark_irn_visited(orig_block); ssa_second_def_block = second_block; @@ -628,605 +537,823 @@ static void construct_ssa(ir_node *orig_block, ir_node *orig_val, if (is_End(user)) continue; - //DB((dbg, LEVEL_3, ">>> Fixing user %+F (pred %d == %+F)\n", user, j, get_irn_n(user, j))); + DB((dbg, LEVEL_5, "original user %ld\n", get_irn_node_nr(user))); if (is_Phi(user)) { ir_node *pred_block = get_Block_cfgpred_block(user_block, j); - newval = search_def_and_create_phis(pred_block, mode, 1); + newval = search_def_and_create_phis(pred_block, mode); } else { - newval = search_def_and_create_phis(user_block, mode, 1); + newval = search_def_and_create_phis(user_block, mode); } - /* don't fix newly created Phis from the SSA construction */ - if (newval != user) { - //DB((dbg, LEVEL_4, ">>>> Setting input %d of %+F to %+F\n", j, user, newval)); + /* If we get a bad node the user keeps the original in. No second definition needed. */ + if (newval != user && !is_Bad(newval)) set_irn_n(user, j, newval); - } } + + ir_free_resources(irg, IR_RESOURCE_IRN_VISITED); } +/* get the number of backedges without alien bes */ +static int get_backedge_n(ir_node *loophead, unsigned with_alien) +{ + int i; + int be_n = 0; + int arity = get_irn_arity(loophead); + for (i = 0; i < arity; ++i) { + ir_node *pred = get_irn_n(loophead, i); + if (is_backedge(loophead, i) && ( with_alien || is_in_loop(pred)) ) + ++be_n; + } + return be_n; +} + +/** + * Sets the nodes backedges, according to its predecessors link. + */ +static void fix_backedge_info(ir_node *node) +{ + int i; + for (i = 0; i < get_irn_arity(node); ++i) + { + ir_node *pred = get_irn_n(node, i); + if (get_node_info(pred)->link != NULL) + set_backedge(node, i); + else + set_not_backedge(node, i); + } +} +/** + * + * ============= PEELING ===================================== + * + */ /** - * Rewires the heads after peeling. This results in a tail-controlled loop. + * Rewires the heads after peeling. */ -void fix_head(ir_node *loophead) +static void peel_fix_heads(void) { + ir_node **loopheadnins, **peelheadnins; + ir_node *loophead = loop_cf_head; + ir_node *peelhead = get_copy(loophead); + int headarity = get_irn_arity(loophead); - int i; - ir_node **loopheadnins; - ir_node **peelheadnins; ir_node *phi; - ir_node *peelhead = get_copy(loophead); + int i; + int lheadin_c = 0; int pheadin_c = 0; - /** - * the loopheads new preds are: - * its own backedge(s) and the former backedge(s) of the peeled code - */ - int lhead_arity = 2 * backedges_n; //ARR_LEN(backedges); - int phead_arity = headarity - backedges_n; //ARR_LEN(backedges); + int backedges_n = get_backedge_n(loophead, 0); - /** We assume the worst case, in which every head entry - * origins from the same node. +1 for a null terminated list. - */ - //int tchead_arity = ARR_LEN(head_entries) + ( headarity - backedges_n) + 1 ; + int lhead_arity = 2 * backedges_n; + int phead_arity = headarity - backedges_n; + /* new in arrays */ NEW_ARR_A(ir_node *, loopheadnins, lhead_arity ); NEW_ARR_A(ir_node *, peelheadnins, phead_arity ); - phi = get_Block_phis(loophead); - while(phi) { - NEW_ARR_A(ir_node *, get_lstate(phi)->ins, lhead_arity); - phi=get_Phi_next(phi); + for_each_phi(loophead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity); } - - phi = get_Block_phis(peelhead); - while(phi) - { - NEW_ARR_A(ir_node *, get_lstate(phi)->ins, phead_arity); - phi=get_Phi_next(phi); + for_each_phi(peelhead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, phead_arity); } for (i = 0; i < headarity; i++) { - ir_node *phi; ir_node *orgjmp = get_irn_n(loophead, i); ir_node *copyjmp = get_copy(orgjmp); /** * Rewire the head blocks ins and their phi ins. - * Requires blocks phi list. - * - * 1. Alien bes origin from the peeled head (new head of the whole loop) - * 2. Loops own bes must be kept/copied to the loophead. - * 3. All other edges origin from the peeled head (new head of the loop) + * Requires phi list per block. */ - - - //printf("head i %d\n", i); - - if (is_backedge(loophead, i)) - { - if (is_alien_edge(loophead, i)) { - peelheadnins[pheadin_c] = orgjmp; /* alien bes go to the peeled head */ - //set_backedge(peelhead, pheadin_c); - - // alien bes origin at the peeled head - for_each_phi(peelhead, phi) - { - //printf("alienbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr); - get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i); - } - //printf("alienbe %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr); - ++pheadin_c; - } else { - loopheadnins[lheadin_c] = orgjmp; /* keep/copy the loops own bes */ - //set_backedge(loophead, lheadin_c); - - for_each_phi(loophead, phi) { - //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n(phi, i)->node_nr); - get_lstate( phi )->ins[lheadin_c] = get_irn_n(phi, i); - } - //printf("normalbe %ld @ %d -> add to loophead orgjump %ld\n", loophead->node_nr, i, orgjmp->node_nr); - ++lheadin_c; - - loopheadnins[lheadin_c] = copyjmp; /* former bes of the peeled code origin now from the loophead */ - //set_not_backedge(loophead, lheadin_c); - - /* get_irn_n( get_copy_of(phi), i) get_copy_of(get_irn_n( phi, i)) - * Order is crucial! Preds outside of the loop are non existent, like Const. - */ - for_each_phi(loophead, phi) { - //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( get_copy_of(phi), i)->node_nr); - get_lstate( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ; - } - //printf("normalbe %ld @ %d -> add to loophead copyjump %ld\n", loophead->node_nr, i, copyjmp->node_nr); - ++lheadin_c; + if (is_backedge(loophead, i) && !is_alien_edge(loophead, i)) { + loopheadnins[lheadin_c] = orgjmp; + /* marks out edge as backedge */ + get_node_info(orgjmp)->link = orgjmp; + for_each_phi(loophead, phi) { + get_node_info( phi )->ins[lheadin_c] = get_irn_n( phi, i) ; + } + ++lheadin_c; + + loopheadnins[lheadin_c] = copyjmp; /* former bes of the peeled code origin now from the loophead */ + /* marks out edge as normal edge */ + get_node_info(copyjmp)->link = NULL; + /* get_irn_n( get_copy_of(phi), i) get_copy_of(get_irn_n( phi, i)) + * Order is crucial! Predecessors outside of the loop are non existent. + * The copy (cloned with its ins!) has pred i, + * but phis pred i might not have a copy of itself. + */ + for_each_phi(loophead, phi) { + //printf("normalbe phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( get_copy_of(phi), i)->node_nr); + get_node_info( phi )->ins[lheadin_c] = get_irn_n( get_copy(phi), i) ; } + ++lheadin_c; } else { peelheadnins[pheadin_c] = orgjmp; - //set_not_backedge(peelhead, pheadin_c); - + /* marks out edge as normal edge */ + get_node_info(orgjmp)->link = NULL; for_each_phi(peelhead, phi) { - //printf("edge phi %ld @ %d -> %ld\n", phi->node_nr, i, get_irn_n( phi, i)->node_nr); - get_lstate( phi )->ins[pheadin_c] = get_irn_n(phi, i); + get_node_info( phi )->ins[pheadin_c] = get_irn_n(phi, i); } - //printf("edge %ld @ %d -> add to peelhead orgjump %ld\n", peelhead->node_nr, i, orgjmp->node_nr); ++pheadin_c; } }/* for */ -// printf("pheadin %d arr %d lheadin %d arr %d \n", -// pheadin_c, ARR_LEN(peelheadnins), -// lheadin_c, ARR_LEN(loopheadnins)); - + //DBG assert(pheadin_c == ARR_LEN(peelheadnins) && lheadin_c == ARR_LEN(loopheadnins) && - "the number of head elements does not match the predefined one"); + "the constructed head arities do not match the predefined arities"); + /** + * assign the ins to the nodes + */ set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins); set_irn_in(peelhead, ARR_LEN(peelheadnins), peelheadnins); + /* Fixes the backedge information according to the link. + * Following loop optimizations might depend on it. */ + fix_backedge_info(loophead); + fix_backedge_info(peelhead); + for_each_phi(loophead, phi) { - ir_node **ins = get_lstate( phi )->ins; + ir_node **ins = get_node_info( phi )->ins; set_irn_in(phi, lhead_arity, ins); } for_each_phi(peelhead, phi) { - ir_node **ins = get_lstate( phi )->ins; + ir_node **ins = get_node_info( phi )->ins; set_irn_in(phi, phead_arity, ins); } } -ir_node *rawcopy_node(ir_node *node) +/** + * Create a raw copy (ins are still the old ones) of the given node. + */ +static ir_node *rawcopy_node(ir_node *node) { ir_node *cp; - link_node_state_t *cpstate; + node_info *cpstate; cp = exact_copy(node); set_copy(node, cp); - cpstate = XMALLOCZ(link_node_state_t); - cp->link = cpstate; - if (is_Block(cp)) - cp->loop = NULL; /* the copy does not belong to the loop */ - set_irn_visited(cp, current_ir_graph->visited); + cpstate = new_node_info(); + set_irn_link(cp, cpstate); + mark_irn_visited(cp); return cp; } +//int temp = 0; +// +///* This walker copies all walked nodes. The walk_condition determines the nodes to walk. */ +//static void keepalives_walk(ir_node *node, walker_condition *walk_condition) +//{ +// int i; +// int arity; +// ir_graph *irg = current_ir_graph; +// +// /** +// * break condition and cycle resolver, creating temporary node copies +// */ +// if (get_irn_visited(node) >= get_irg_visited(irg)) { +// return; +// } +// +// /* Walk */ +// mark_irn_visited(node); +// +// if (!is_Block(node)) { +// ir_node *pred = get_nodes_block(node); +// if (walk_condition(pred)) +// keepalives_walk( pred, walk_condition ); +// } +// +// arity = get_irn_arity(node); +// +// for (i = get_irn_arity(node) - 1; i >= 0; --i) { +// ir_node *pred = get_irn_n(node, i); +// +// if (walk_condition(pred)) +// keepalives_walk( pred, walk_condition ); +// } +// +// add_End_keepalive(get_irg_end(current_ir_graph), node); +//} + + /** - * Peels the loop by copying the contents. Graph needs some rewiring after that. + * This walker copies all walked nodes. + * If the walk_condition is true for a node, it is walked. + * All nodes node_info->copy attributes has to be NULL prior to every to every walk. */ -void peel_walk(ir_node *node) +static void copy_walk(ir_node *node, walker_condition *walk_condition) { int i; int arity; ir_node *cp; ir_node **cpin; ir_graph *irg = current_ir_graph; - - //(void) env; - - link_node_state_t *nodestate = get_lstate(node); + node_info *node_info = get_node_info(node); /** * break condition and cycle resolver, creating temporary node copies */ - if (node->visited >= irg->visited) - { - if (!nodestate->cloned && !nodestate->temp) - { - /** temporary clone this node - * because we were here before and would walk into a cycle - */ - rawcopy_node(node); - nodestate->temp=1; + if (get_irn_visited(node) >= get_irg_visited(irg)) { + /* Here we rely on nodestate's copy being initialized with NULL */ + DB((dbg, LEVEL_5, "copy_walk: We have already visited %ld\n", get_irn_node_nr(node))); + if (node_info->copy == NULL) { + if (!is_Const(node) && !is_SymConst(node)) { + cp = rawcopy_node(node); + } else { + cp = node; + node_info->copy = cp; + } + DB((dbg, LEVEL_5, "The TEMP copy of %ld is created %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp))); } return; } - //printf(" ----- WALK %ld ----- \n", node->node_nr); - /** - * WALK - */ - set_irn_visited(node, irg->visited); +// add_End_keepalive(get_irg_end(current_ir_graph), node); - if ( !is_Block(node) ) { - ir_node *pred = get_irn_n(node, -1); - if (is_in_loop(pred)) - peel_walk(pred); + /* Walk */ + mark_irn_visited(node); + + if (!is_Block(node)) { + ir_node *pred = get_nodes_block(node); + if (walk_condition(pred)) + DB((dbg, LEVEL_5, "walk block %ld\n", get_irn_node_nr(pred))); + copy_walk( pred, walk_condition ); } arity = get_irn_arity(node); NEW_ARR_A(ir_node *, cpin, arity); - for (i = get_irn_arity(node) - 1; i >= 0; --i) { ir_node *pred = get_irn_n(node, i); - /* collect head entries */ - if ( is_in_head(pred) && !is_in_head(node) ) - { - loop_entry_t entry; - entry.node = node; - entry.pred_irn_n = i; - ARR_APP1(loop_entry_t, head_entries, entry); - } - - if (is_in_loop(pred)) - { - peel_walk(pred); - cpin[i] = get_copy(pred); //get_lstate(pred)->link; - //printf("copy of %ld gets in %ld", node->node_nr, cpin[i]->node_nr); + if (walk_condition(pred)) { + DB((dbg, LEVEL_5, "walk node %ld\n", get_irn_node_nr(pred))); + copy_walk( pred, walk_condition ); + cpin[i] = get_copy(pred); + DB((dbg, LEVEL_5, "copy of %ld gets new in %ld which is copy of %ld\n", + get_irn_node_nr(node), get_irn_node_nr(get_copy(pred)), get_irn_node_nr(pred))); } else { cpin[i] = pred; } - //printf("copy of %ld gets in %ld \n", node->node_nr, cpin[i]->node_nr); } - /** - * copy node / finalize temp node - */ - if (!nodestate->temp) { -// if (!is_Const(node) && !is_SymConst(node)) { + /* copy node / finalize temp node */ + if (node_info->copy == NULL) { + /* No temporary copy existent */ + + /* Do not copy constants TODO right? */ + if (!is_Const(node) && !is_SymConst(node)) { cp = rawcopy_node(node); -// } else { -// cp = node; -// //DBG -// printf("CONST FINAL: %ld -F> %ld \n", node->node_nr, cp->node_nr); -// nodestate->link = cp; -// } + } else { + cp = node; + node_info->copy = cp; + } + DB((dbg, LEVEL_5, "The FINAL copy of %ld is CREATED %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp))); } else { /* temporary copy is existent but without correct ins */ - cp = get_copy(node); // nodestate->link; - //printf("FINALIZE: %ld \n", cp->node_nr); + cp = get_copy(node); + DB((dbg, LEVEL_5, "The FINAL copy of %ld is EXISTENT %ld\n", get_irn_node_nr(node), get_irn_node_nr(cp))); } - // special treatment for the head/condition: we need 3 heads for a tail-controlled and peeled loop - if (is_in_head(node)) { - // head/condition for the tail-controlled loop - // These copies are linked to the copies - rawcopy_node(cp); - } - - if (!is_Block(node)) - { + if (!is_Block(node)) { ir_node *cpblock = get_copy(get_nodes_block(node)); - /* set the block of the copy to the copied block */ - //printf(" PRE NODE %ld BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr); set_nodes_block(cp, cpblock ); - //printf(" POST NODE %ld BLOCK %ld \n", cp->node_nr, get_nodes_block(cp)->node_nr); - - /* fix the phi information in attr.phis (does not add the phi node to the block) */ + /* fix the phi information in attr.phis */ if( is_Phi(cp) ) - { add_Block_phi(cpblock, cp); - //printf("PHI-BLOCK block %ld got its phi %ld\n", cpblock->node_nr, cp->node_nr); - } - } - else { - /* macroblock info is not copied */ + } else { + /* macroblock info has not been copied */ set_Block_MacroBlock(cp, cp); } - //dbg valid ins? -// for(i=0; inode_nr, cp->node_nr, cpin[i]->node_nr); - set_irn_in(cp, ARR_LEN(cpin), cpin); - -// for(i=0; i< ARR_LEN(cpin); i++) -// { -// printf("ins %ld: %ld \n", cp->node_nr, cpin[i]->node_nr); -// } - -//TODO REM -// if (!nodestate->temp) -// { -// nodestate->link = cp; -// cpstate = XMALLOCZ(link_node_state_t); -// cp->link = cpstate; -// } else { -// /* temporary copy is existent but without correct ins */ -// cp = nodestate->link; -// } - - - nodestate->temp = 0; - nodestate->cloned = 1; } -//void chklink (ir_node *n, void * e) -//{ -// ir_node *link = n->link; -// link_node_state_t *l = (link_node_state_t *)link; -// -// printf("n %ld\n", n->node_nr); -// printf("l p %ld\n", l->link); -// if (l->link) -// printf("l %ld\n", l->link->node_nr); -// -//} - -/** - * Loop peeling, and fix the cf for the loop entry nodes, which have now more preds - */ -void peel(void) +/* Loop peeling, and fix the cf for the loop entry nodes, which have now more preds */ +static void peel(out_edges *loop_outs) { int i; ir_node **entry_buffer; int entry_c = 0; - int entry_i; - NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_entries)); + ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); - for(i = 0; i < ARR_LEN(loop_entries); i++) - { - loop_entry_t entry = loop_entries[i]; + NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(loop_outs)); + + /* duplicate loop walk */ +// cur_head = loop_cf_head; + inc_irg_visited(current_ir_graph); + + for(i = 0; i < ARR_LEN(loop_outs); i++) { + out_edges entry = loop_outs[i]; ir_node *node = entry.node; ir_node *pred = get_irn_n(entry.node, entry.pred_irn_n); if (is_Block(node)) { - /* node is block and the given pred points inside the loop */ - ir_node *cppred; + copy_walk( pred, is_in_loop ); + duplicate_preds(node, entry.pred_irn_n, get_copy(pred) ); + } else { + copy_walk( pred, is_in_loop ); + if (!is_End(node)) /* leave out keepalives */ + /* Node is user of a value defined inside the loop. + * We'll need a phi since we duplicated the loop. */ + /* cannot construct_ssa here, because it needs another walker */ + entry_buffer[entry_c++] = pred; + } + } - peel_walk( pred ); + ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_VISITED); - // leave keepalives out - if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) { - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); - } else { - cppred = get_copy(pred); - //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr); - add_pred( node, cppred ); - //printf("fix block entry %ld to cp %ld\n", node->node_nr, cppred->node_nr); - } + /* Rewires the 2 heads */ + peel_fix_heads(); - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); + /* Generate phis for values from peeled code and original loop */ + for(i = 0; i < entry_c; i++) + { + ir_node *cppred, *block, *cpblock, *pred; - //DBG - //phifix(node, cppred); - } else { - /* node is somewhere in the graph, outside of the loop */ - //ir_node *cppred; - //ir_node *block; - //ir_node *cpblock; - peel_walk( pred ); - - // no ssa for keepalives - if (is_End(node) && (is_Block(pred) || is_Phi(pred)) ) { - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); - } else { - //printf("fix entry %ld to %ld\n", node->node_nr, pred->node_nr); - entry_buffer[entry_c++] = pred; - } + /* It is not possible to use + * pred = get_irn_n(entry.node, entry.pred_irn_n); + * because we might have changed the nodes predecessors in construct_ssa + */ + pred = entry_buffer[i]; + cppred = get_copy(pred); + block = get_nodes_block(pred); + cpblock = get_nodes_block(cppred); + construct_ssa(block, pred, cpblock, cppred); + } +} + +/* + * Populates head_entries with (node, pred_pos) tuple + * whereas the node's pred at pred_pos is in the head but not the node itself. + * Head and condition chain blocks must be marked. + */ +static void get_head_entries(ir_node *node, void *env) +{ + int i; + int arity = get_irn_arity(node); + (void) env; + + for(i = 0; i < arity; ++i) { + /* node is not in the head, but the predecessor is. + * (head or loop chain nodes are marked) */ + if (!is_nodesblock_marked(node) && is_nodesblock_marked(get_irn_n(node, i))) { + out_edges entry; + entry.node = node; + entry.pred_irn_n = i; + DB((dbg, LEVEL_5, + "Found head chain entry %ld @%d because !inloop %ld and inloop %ld\n", + node->node_nr, i, node->node_nr, get_irn_n(node, i)->node_nr)); + ARR_APP1(out_edges, cur_head_outs, entry); + } + } +} - //add_End_keepalive(get_irg_end(current_ir_graph), get_copy_of(pred) ); +/** + * Find condition chains, and add them to be inverted, until the node count exceeds the limit. + * A block belongs to the chain if a condition branches out of the loop. + * Returns if the given block belongs to the condition chain. + * FIXME prevent collecting ALL loop blocks (may happen if all blocks jump out of the loop) + */ +static unsigned condition_chains(ir_node *block) { + const ir_edge_t *edge; + unsigned mark = 0; + //unsigned over_limit = 0; - // cannot construct_ssa here, because it needs another walker + int nodes_n = 0; - } /* is block */ - } /* for */ + /* we need all outs, including keeps (TODO firm function for that??) */ + foreach_out_edge_kind(block, edge, EDGE_KIND_NORMAL) { + ++nodes_n; + } - //irg_walk_graph(current_ir_graph, chklink, NULL, NULL); + /* We do not want to collect more nodes from condition chains, than the limit allows us to. */ + if (head_inversion_node_count + nodes_n > head_inversion_node_limit) { + //over_limit = 1; + set_Block_mark(block, 0); +// printf(" %ld over limit\n", block->node_nr); + return 0; + } - fix_head(loop_cf_head); + ++loop_info.blocks; + + /* First: check our successors, and add all succs that are outside of the loop to the list */ + foreach_block_succ(block, edge) { + ir_node *src = get_edge_src_irn( edge ); + int pos = get_edge_src_pos( edge ); + + if (!is_in_loop(src)) { + //printf(" src %ld @ %d into block %ld \n", src->node_nr, pos, block->node_nr); + mark = 1; + out_edges entry; + entry.node = src; + entry.pred_irn_n = pos; + ARR_APP1(out_edges, cond_chain_entries, entry); + mark_irn_visited(src); + } + } - //printf (" FIXHEAD DONE :D \n"); + /* this block is not part of the chain, + * because the chain would become too big or we have no succ outside of the loop */ + if (mark == 0) { + set_Block_mark(block, 0); + return 0; + } else { + set_Block_mark(block, 1); + DB((dbg, LEVEL_5, "block %ld is part of condition chain\n", get_irn_node_nr(block))); + head_inversion_node_count += nodes_n; + } - entry_i = 0; + /* Second: walk all successors, and add them to the list if they are not part of the chain */ + foreach_block_succ(block, edge) { + unsigned inchain; + ir_node *src = get_edge_src_irn( edge ); + int pos = get_edge_src_pos( edge ); - /* Generate phis for values from peeled code and original loop */ - for(i = 0; entry_i < entry_c; i++) - { - loop_entry_t entry = loop_entries[i]; - ir_node *node = entry.node; + /* already done cases */ + if (!is_in_loop( src ) || (get_irn_visited(src) >= get_irg_visited(current_ir_graph))) + continue; - if (is_Block(node)) - { - /* block */ - ir_node *phi=get_Block_phis(node); + mark_irn_visited(src); - while(phi) - { - add_pred(phi, entry_buffer[entry_i++]); - phi=get_Phi_next(phi); - } - } else { - /* not block */ + inchain = condition_chains( src ); - ir_node *cppred, *block, *cpblock, *pred; + /* if successor is not part of chain we need to collect its outs */ + if ( !inchain ) { + out_edges entry; + entry.node = src; + entry.pred_irn_n = pos; + ARR_APP1(out_edges, cond_chain_entries, entry); + } + } + return mark; +} - /** - * pred = get_irn_n(entry.node, entry.pred_irn_n); - * does not work, because we could have changed the nodes preds in construct_ssa - */ +/** + * + */ +static void inversion_fix_heads(void) +{ + ir_node **loopheadnins, **invheadnins; + ir_node *loophead = loop_cf_head; + ir_node *invhead = get_copy(loophead); - pred = entry_buffer[entry_i++]; + int headarity = get_irn_arity(loophead); + ir_node *phi; + int i; - //printf("pred %ld\n", pred->node_nr); - cppred = get_copy(pred); - //printf("cppred %ld\n", cppred->node_nr); - block = get_nodes_block(pred); - //printf("block %ld\n", block->node_nr); - cpblock = get_nodes_block(cppred); - //printf("cpblock %ld\n", cpblock->node_nr); + int lheadin_c = 0; + int iheadin_c = 0; + int backedges_n = get_backedge_n(loophead, 0); + int lhead_arity = headarity - backedges_n; + int ihead_arity = backedges_n; - //dump_ir_block_graph(current_ir_graph, "vorher"); - construct_ssa(block, pred, cpblock, cppred); - //add_End_keepalive(get_irg_end(current_ir_graph), cppred); + /* new in arrays for all phis in the head blocks */ + NEW_ARR_A(ir_node *, loopheadnins, lhead_arity); + NEW_ARR_A(ir_node *, invheadnins, ihead_arity); + for_each_phi(loophead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, lhead_arity); + } + for_each_phi(invhead, phi) { + NEW_ARR_A(ir_node *, get_node_info(phi)->ins, ihead_arity); + } - //add_pred(get_irg_end(current_ir_graph), cppred); - //dump_ir_block_graph(current_ir_graph, "nachher"); + for (i = 0; i < headarity; i++) { + ir_node *pred = get_irn_n(loophead, i); + /** + * Rewire the head blocks ins and their phi ins. + * Requires phi list per block. + */ + if ( is_backedge(loophead, i) ) { + invheadnins[iheadin_c] = pred; + for_each_phi(invhead, phi) { + get_node_info( phi )->ins[iheadin_c] = get_irn_n( phi, i) ; + } + ++iheadin_c; + } else { + /* just copy these edges */ + loopheadnins[lheadin_c] = pred; + for_each_phi(loophead, phi) { + get_node_info( phi )->ins[lheadin_c] = get_irn_n(phi, i); + } + ++lheadin_c; } + }/* for */ + + /* assign the ins to the head blocks */ + set_irn_in(loophead, ARR_LEN(loopheadnins), loopheadnins); + set_irn_in(invhead, ARR_LEN(invheadnins), invheadnins); + + /* assign the ins for the phis */ + for_each_phi(loophead, phi) { + ir_node **ins = get_node_info(phi)->ins; + set_irn_in(phi, lhead_arity, ins); } -} -void alloc_linkstructs(ir_node *node, void *env) -{ - link_node_state_t *state = XMALLOCZ(link_node_state_t); - (void) env; - node->link = (void *)state; + for_each_phi(invhead, phi) { + ir_node **ins = get_node_info(phi)->ins; + set_irn_in(phi, ihead_arity, ins); + } } -void free_linkstructs(ir_node *node, void *env) -{ - (void) env; - xfree( (link_node_state_t*) node->link); -} -void decision_maker(void) +static void loop_inversion_walk(out_edges *head_entries) { - //inc_irg_visited(current_ir_graph); - //loop_walker( loop_entries, NULL, get_invariants, NULL ); + int i; + ir_node *phi; + int entry_c = 0; + ir_node **entry_buffer; + ir_node **head_phi_assign; + NEW_ARR_A(ir_node *, entry_buffer, ARR_LEN(head_entries)); - inc_irg_visited(current_ir_graph); - irg_walk_graph(current_ir_graph, alloc_linkstructs, NULL, NULL); + head_phi_assign = NEW_ARR_F(ir_node *, 0); - inc_irg_visited(current_ir_graph); - peel(); + /* Find assignments in the condition chain, to construct_ssa for them after the loop inversion. */ + for_each_phi( loop_cf_head , phi) { + for(i=0; i max_loop_opnodes) + return; - // TODO free all link states... or better put them on functionstack +// foreach_out_edge(loop_cf_head, edge) { +// ir_node *node = get_edge_src_irn(edge); +// if ( !is_Block(node) && !is_Proj(node) && !is_Phi(node) ) +// ++loop_info.opnodes_head; +// } + + inc_irg_visited(current_ir_graph); + loop_walker( loop_outs, NULL, get_invariants, NULL ); + + /* This could be improved with knowledge about variable range. */ + if (loop_info.stores == 0 && loop_info.invariant_loads > 0) + do_peel = 1; + +#endif + + + do_peel = 1; + do_inversion = 1; + + /* Loop peeling */ + if (do_peel) { + peel(cur_loop_outs); + reset_node_infos(); + } + + DEBUG_ONLY(dump_ir_block_graph(current_ir_graph, "-peeled1")); + + DEL_ARR_F(cur_loop_outs); + + /* Loop inversion */ + /* Search for condition chains. We may not do this before peeling, as peeling changes things. */ + ir_reserve_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK); + irg_walk_graph(current_ir_graph, unmark_block, NULL, NULL); + + cond_chain_entries = NEW_ARR_F(out_edges, 0); + head_inversion_node_count = 0; + inc_irg_visited(current_ir_graph); + set_Block_mark(loop_cf_head, 1); + mark_irn_visited(loop_cf_head); + /* find condition chains */ + condition_chains(loop_cf_head); + + // TODO assume number of phis to be created. prevent inversion... + + /* Loop inversion */ + if (loop_info.blocks < 2) { + do_inversion = 0; + DB((dbg, LEVEL_2, "Loop contains %d (less than 2) blocks => No Inversion done.\n", loop_info.blocks)); + } + + if (do_inversion) { + cur_head_outs = NEW_ARR_F(out_edges, 0); + + /* get all edges pointing into the head or condition chain */ + irg_walk_graph(current_ir_graph, get_head_entries, NULL, NULL); + + loop_inversion_walk(cur_head_outs); + + DEL_ARR_F(cur_head_outs); + } + + DEBUG_ONLY(dump_ir_block_graph(current_ir_graph, "-inversed2")); /* FREE */ - DEL_ARR_F(loop_entries); - DEL_ARR_F(head_entries); - //DEL_ARR_F(backedges); - //DEL_ARR_F(alien_backedges); - //DEL_ARR_F(head_edges); + DEL_ARR_F(cond_chain_entries); + ir_free_resources(current_ir_graph, IR_RESOURCE_BLOCK_MARK); +} + +/* */ +static void analyze_loop(ir_loop *loop) +{ + /* Init new for every loop */ + cur_loop = loop; - //dump_ir_block_graph(current_ir_graph, "-lu1"); + loop_cf_head = NULL; + loop_cf_head_valid = 1; + loop_inv_head = NULL; + loop_peeled_head = NULL; + + loop_info.calls = 0; + loop_info.invariant_loads = 0; + loop_info.loads = 0; + loop_info.stores = 0; + loop_info.opnodes_n = 0; + loop_info.blocks = 0; + + DB((dbg, LEVEL_1, " >>>> current loop includes node %ld <<<\n", get_irn_node_nr(get_loop_node(loop, 0)))); + + decision_maker(); + + DB((dbg, LEVEL_1, " <<<< end of loop with node %ld >>>>\n", get_irn_node_nr(get_loop_node(loop, 0)))); } -/** - * Find most inner loops and send them to analyze_loop - */ -void analyze_inner_loop(ir_loop *loop) +/* Find most inner loops and send them to analyze_loop */ +static void analyze_inner_loop(ir_loop *loop) { /* descend into sons */ int sons = get_loop_n_sons(loop); - //printf("found %d loops \n", sons); - - if (sons==0) - { - //printf("analyze loop %ld\n", loop->loop_nr); + if (sons==0) { analyze_loop(loop); - } - else - { + } else { int s; - for(s=0; sloop_nr); + for(s=0; s>> loop optimization (Startnode %ld) <<<\n", get_irn_node_nr(get_irg_start(irg)))); + + /* Init */ + link_node_state_list = NULL; + + /* preconditions */ + ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST); + collect_phiprojs(irg); + ir_free_resources(irg, IR_RESOURCE_IRN_LINK); + + set_current_ir_graph(irg); assure_cf_loop(irg); + /* allocate node_info for additional information on nodes */ + ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); + irg_walk_graph(current_ir_graph, alloc_node_info, NULL, NULL); + loop = get_irg_loop(irg); sons = get_loop_n_sons(loop); - //printf("FOUND %d LOOPS \n", sons); - for (nr=0; nrpass, name ? name : "loop_unroll", -// loop_unroll_wrapper); -//} + DB((dbg, LEVEL_1, " >>> loop optimization done (Startnode %ld)<<<\n", get_irn_node_nr(get_irg_start(irg)))); +} -/* -void firm_init_loopunroll(void) { - FIRM_DBG_REGISTER(dbg, "firm.opt.loopunroll"); -}*/ +void firm_init_loop(void) { + FIRM_DBG_REGISTER(dbg, "firm.opt.loop"); +}