3 * @file loop_unrolling.c
6 * File name: ir/opt/loop_unrolling.c
7 * Purpose: Make loop unrolling.
8 * Author: Beyhan Veliev
12 * Copyright: (c) 2004 Universität Karlsruhe
13 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
17 # include "loop_unrolling.h"
22 # include "irloop_t.h"
23 # include "irgopt_t.h"
24 # include "irnode_t.h"
28 # include "strength_red.h"
32 /*We will know if the head of the loop to be copy.
33 * Default don't copy.*/
34 static int copy_loop_head = 0;
37 ir_node *irn ; /* Node of the loop to be unrolling*/
38 ir_node *copy[MAX_UNROLL] ; /* The copy of the node */
42 * compare two elements of the copies_t set
44 static int set_cmp(const void *elt, const void *key, size_t size)
46 const copies_t *c1 = elt;
47 const copies_t *c2 = key;
49 return c1->irn != c2->irn;
52 static INLINE int * new_backedge_arr(struct obstack *obst, int size)
54 int *res = NEW_ARR_D (int, obst, size);
55 memset(res, 0, sizeof(int) * size);
59 static INLINE void new_backedge_info(ir_node *n) {
60 switch(get_irn_opcode(n)) {
62 n->attr.block.cg_backedge = NULL;
63 n->attr.block.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
66 n->attr.phi_backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
69 n->attr.filter.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
76 * Remember the new node in the old node by using a field all nodes have.
80 set_new_node (ir_node *old, ir_node *new)
86 * Copies the node to the new obstack. The Ins of the new node point to
87 * the predecessors on the old obstack. For block/phi nodes not all
88 * predecessors might be copied. n->link points to the new node.
89 * For Phi and Block nodes the function allocates in-arrays with an arity
90 * only for useful predecessors. The arity is determined by counting
91 * the non-bad predecessors of the block.
93 * @param n The node to be copied
94 * @param env if non-NULL, the node number attribute will be copied to the new node
98 copy_node (ir_node *n, void *env)
102 opcode op = get_irn_opcode(n);
103 int copy_node_nr = env != NULL;
105 /* The end node looses it's flexible in array. This doesn't matter,
106 as dead node elimination builds End by hand, inlineing doesn't use
108 /* assert(n->op == op_End || ((_ARR_DESCR(n->in))->cookie != ARR_F_MAGIC)); */
111 /* node copied already */
114 new_arity = get_irn_arity(n);
116 nn = new_ir_node(get_irn_dbg_info(n),
125 /* Copy the attributes. These might point to additional data. If this
126 was allocated on the old obstack the pointers now are dangling. This
127 frees e.g. the memory of the graph_arr allocated in new_immBlock. */
128 copy_node_attr(n, nn);
129 new_backedge_info(nn);
134 /* for easier debugging, we want to copy the node numbers too */
135 nn->node_nr = n->node_nr;
141 static int is_Phi_in_loop_head(ir_node *phi, ir_node *loop_head) {
142 assert(is_Block(loop_head));
143 return is_Phi(phi) && (get_nodes_block(phi) == loop_head);
147 * Copies predecessors of node to copies of node.
148 * If the predecessor is a loop invariant, then the copy get it
149 * as predecessor, else the copy of the predecessor.
151 * @param *l_n A set, where the node of the loop are saved.
152 * @param *value A element of the set.
153 * @param *info Contains information about the induction variable.
154 * @param *unroll_factor A integer power of 2.
155 * @param *env Free environment pointer.
158 set_preds (set *l_n, copies_t *value, induct_var_info *info, int unroll_factor, void *env)
161 copies_t *value_pred;
163 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
165 irn_arity = get_irn_arity(value->irn);
167 for (i = 0; i < irn_arity; i++) {
168 ir_node *pred = get_irn_n(value->irn, i);
173 value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(pred));
175 if (value->irn != loop_head && !is_Phi_in_loop_head(value->irn, loop_head)) {
176 if (value_pred == NULL) {
177 /* Is loop invariant. */
178 for(int p = 0; p < unroll_factor -1; p++)
179 set_irn_n (value->copy[p], i, pred);
182 for(int p = 0; p < unroll_factor -1; p++)
183 set_irn_n (value->copy[p], i, value_pred->copy[p]);
188 /* Set the backedge of phi in the loop head.The backedge of phis in the loop head
189 * must now point to the value defined
190 * in the last copie of the loop body.
192 * @param *l_n A set, where the node of the loop are saved.
193 * @param *value A phi in the loop head.
194 * @param *info Contains information about the induction variable.
195 * @param *unroll_factor A integer power of 2.
198 set_phi_backedge(set *l_n, copies_t *value, induct_var_info *info, int unroll_factor)
200 copies_t key, *value_pred;
201 key.irn = get_irn_n(value->irn, info->op_pred_pos);
202 value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
204 set_Phi_pred(value->irn, info->op_pred_pos, value_pred->copy[unroll_factor - 2]);
207 /** Test for a loop head.
209 * Returns true if the node has predecessors in the loop _and_ out of
210 * the loop. Then it is a loop head: The loop can be entered through
213 * @param *n The node to be tested.
214 * @param *info Contains the loop information.
217 is_loop_head(ir_node *n, induct_var_info *info)
220 int some_outof_loop = 0, some_in_loop = 0;
222 assert(get_irn_op(n) == op_Block);
223 arity = get_Block_n_cfgpreds(n);
225 for (i = 0; i < arity; i++) {
226 ir_node *pred = get_Block_cfgpred(n, i);
228 if (is_loop_invariant(pred, get_loop_node(info->l_itervar_phi, 0))) {
234 return some_outof_loop && some_in_loop;
238 /** Test wether the passed loop is a natural loop.
240 * Returns true if the loop has only one loop header and only a single
243 * @param *info Contains the loop information.
246 is_natural_loop ( induct_var_info *info)
250 l_n_node = get_loop_n_nodes (info->l_itervar_phi);
252 for (int i = 1; i < (l_n_node); i ++) {
253 l_node = get_loop_node (info->l_itervar_phi, i);
254 if (is_loop_head(l_node, info)) return 0;
256 if (has_backedges(l_node) && i != l_n_node-1) return 0;
262 /** Serch for all nodes of a loop.
264 * @param *node The induction variable of the loop.
265 * @param *loop_head The head of the loop.
266 * @param *l_n A set, where the node of the loop are saved.
269 find_loop_nodes(ir_node *node, ir_node *loop_head, set *l_n)
271 copies_t key, *value;
273 /* Add this node to the list. */
275 for(int i = 0; i < 4 ;i++)
277 value = set_insert(l_n, &key, sizeof(key), HASH_PTR(key.irn));
279 /* Add all outs of this node to the list, if they are within the loop. */
280 for (int i = get_irn_n_outs(node) - 1; i >= 0; i--) {
281 ir_node *pred = get_irn_out(node, i);
283 if (!is_loop_invariant(pred, loop_head) &&
284 set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
285 find_loop_nodes(pred, loop_head, l_n);
289 /* Add all ins if they are within the loop. */
290 for(int i = get_irn_arity(node) -1; i >=0; i--) {
291 ir_node *pred = get_irn_n(node, i);
293 if (!is_loop_invariant(pred, loop_head) &&
294 set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ){
295 find_loop_nodes(pred, loop_head, l_n);
300 /* Make a new loop head if copy_loop_head = 1.
302 * @param *l_n A set, where the node of the loop are saved.
303 * @param *info Contains the loop information.
304 * @param *value A element of the set.
305 * @param *unroll_factor A integer power of 2.
309 new_loop_head (set *l_n, induct_var_info *info, copies_t *value, int unroll_factor)
311 copies_t block, *value_backedge_jmp, *backedge_jmp_block;
313 ir_node *backedge_jmp = get_Block_cfgpred(value->irn, info->op_pred_pos);
314 block.irn = backedge_jmp;
316 value_backedge_jmp = set_find( l_n, &block, sizeof(block), HASH_PTR(block.irn));
318 ir_node *new_loop_head = new_Block(1, &backedge_jmp);
319 value->copy[0] = new_loop_head;
321 for(int i = 1; i<unroll_factor - 1; i++){
322 ir_node *new_loop_head1 = new_Block(1, &value_backedge_jmp->copy[i-1]);
323 value->copy[i] = new_loop_head1;
327 block.irn = get_nodes_block(backedge_jmp);
328 backedge_jmp_block = set_find( l_n, &block, sizeof(block), HASH_PTR(block.irn));
330 set_irn_n(backedge_jmp_block->copy[0], 0, value_backedge_jmp->irn) ;
332 for(int i = 1; i<unroll_factor - 1; i++)
333 set_irn_n(backedge_jmp_block->copy[i], 0, value_backedge_jmp->copy[i - 1]);
338 /* Set all copies of the induction variable.
340 * @param *phi A phi node in the loop head block.
341 * @param *phi_pred The predecessor of the phi along the backedge.
342 * @param *unroll_factor A integer power of 2.
346 set_Phi_copies(copies_t *phi, copies_t *phi_pred, int unroll_factor)
348 phi->copy[0] = phi_pred->irn;
349 for(int p = 1; p < unroll_factor -1; p++)
350 phi->copy[p] = phi_pred->copy[p -1];
353 /* Decide if the loop head to be copy. A head with important nodes
356 * @param *l_n A set, where the node of the loop are saved.
357 * @param *info Contains information about the induction variable.
360 loop_head_nodes(set *l_n, induct_var_info *info)
363 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
365 for (value = set_first(l_n); value != NULL; value = set_next(l_n))
366 if(value->irn->op != op_Block &&
367 get_nodes_block(value->irn) == loop_head)
368 switch(get_irn_opcode(value->irn)) {
384 /** Copy all loop nodes.
386 * @param *l_n Contains all nodes of the loop.
387 * @param *info Contains the loop information.
388 * @param *unroll_factor A integer power of 2.
391 copy_loop_body(set *l_n, induct_var_info *info, int unroll_factor)
393 copies_t *value, *info_op, *phi, *loop_h, key, *value1;
395 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
398 for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
399 if(value->irn == loop_head)
401 else if (is_Phi_in_loop_head(value->irn, loop_head))
403 else if(copy_loop_head){
404 for (int i = 0; i<unroll_factor -1; i++){
405 copy_node(value->irn, NULL);
406 value->copy[i] = get_irn_link(value->irn);
409 if((value->irn->op == op_Block &&
410 value->irn != loop_head) ||
411 (value->irn->op != op_Block &&
412 get_nodes_block(value->irn) != loop_head))
413 for (int i = 0; i<unroll_factor -1; i++){
414 copy_node(value->irn, NULL);
415 value->copy[i] = get_irn_link(value->irn);
419 /* Treat the loop head block */
420 new_loop_head (l_n, info, loop_h, unroll_factor);
422 /* Similarily treat the Phis in the loop head block. */
424 info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
425 assert(info_op->irn == get_Phi_pred(info->itervar_phi, info->op_pred_pos));
426 for (int i = 0; i < get_irn_n_outs(loop_head); ++i) {
427 ir_node *phi = get_irn_out(loop_head, i);
430 key.irn = get_Phi_pred(phi, info->op_pred_pos); // info->op;
431 copies_t *phi_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
433 copies_t *phi_op = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
434 set_Phi_copies(phi_op, phi_pred, unroll_factor);
439 for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
442 set_preds(l_n, value, info, unroll_factor, NULL);
443 else if((value->irn->op != op_Block) && get_nodes_block(value->irn) != loop_head)
444 set_preds(l_n, value, info, unroll_factor, NULL);
446 if (is_Phi_in_loop_head(value->irn, loop_head))
447 set_phi_backedge(l_n, value, info, unroll_factor);
449 if ((value->irn->op != op_Block) && !is_Phi_in_loop_head(value->irn, loop_head)) {
450 ir_node *nodes_block = get_nodes_block(value->irn);
452 if(!copy_loop_head && nodes_block == loop_head)
455 key.irn = nodes_block;
456 value1 = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
458 for(int p = 0; p < unroll_factor - 1; p++){
459 set_nodes_block(value->copy[p], value1->copy[p]);
460 //add_End_keepalive(get_irg_end(current_ir_graph), value->copy[p]);
467 set_loop_outs(set *l_n, induct_var_info *info, int unroll_factor)
469 copies_t *value, key;
470 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
472 value = set_first(l_n);
473 for( ; value != NULL; value = set_next(l_n))
474 if(value->irn != info->op && !is_Phi_in_loop_head(value->irn, loop_head) &&
475 get_irn_opcode(value->irn) != iro_Proj)
476 for(int i = 0; i < get_irn_n_outs(value->irn); i++){
477 key.irn = get_irn_out(value->irn, i);
478 if(set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL)
479 for(int p = 0; p < get_irn_arity(key.irn); p++)
480 if(value->irn == get_irn_n(key.irn, p))
481 set_irn_n (key.irn, p, value->copy[unroll_factor-2]);
486 /** Unroll the loop boby with a factor that must be power of two.
488 * @param *n A ir node.
489 * @param *env Free environment pointer.
491 static void do_loop_unroll(ir_node *n, void *env){
493 induct_var_info info;
494 info.itervar_phi = n;
495 int l_sons = 0, unroll_factor = 0;
497 /* The ir node must be a induction varible. */
499 if (get_irn_op (n) == op_Phi) {
500 if (is_induction_variable (&info) == NULL) return;
503 /* Brute force limiting of loop body size. */
504 if (get_loop_n_nodes(info.l_itervar_phi) > 2 ) return;
506 /* Only unroll loops that compare against a constant for exiting. */
507 if (info.cmp == NULL) return;
509 /* We only want to unroll innermost loops. */
510 l_sons = get_loop_n_sons (info.l_itervar_phi);
514 ir_node* cmp_out = get_irn_out(info.cmp, 0);
516 if(!is_Proj(cmp_out)) return;
517 if(get_irn_op(info.increment) != op_Const) return;
519 int cmp_typ = get_Proj_proj(cmp_out);
520 int init = get_tarval_long(get_Const_tarval
521 (get_Phi_pred(info.itervar_phi, info.init_pred_pos)));
522 int iter_end = get_tarval_long(get_Const_tarval(info.cmp_const));
523 int iter_increment = get_tarval_long(get_Const_tarval(info.increment));
524 int diff, iter_number;
532 iter_increment = iter_increment < 0 ? -iter_increment : iter_increment;
533 diff = iter_end - init;
535 if (diff == 0 || iter_increment == 0) return;
537 iter_number = diff/iter_increment;
538 if((cmp_typ == 3 || cmp_typ == 5) && (iter_end % iter_increment == 0))
541 if(iter_number % 4 == 0)
543 else if(iter_number % 3 == 0)
545 else if(iter_number % 2 == 0)
550 printf("\ninit %d,\n iter_end %d, \n diff %d, cmp_typ\n %d, \n unroll_factor %d", init, iter_end, diff, cmp_typ, unroll_factor);
552 // int unroll_factor = 4; /* Must be power of 2. */
553 assert(unroll_factor <= MAX_UNROLL);
557 loop_head = (is_natural_loop(&info)) ? get_loop_node(info.l_itervar_phi, 0) : NULL;
559 assert(loop_head != NULL && is_Block(loop_head));
561 /* We assume, the loop head has exactly one backedge. The position of
562 the backedge is in the following variable: */
564 backedge_pos = (is_backedge(loop_head, 0)) ? 0:1;
566 /* A set with the nodes to copy. */
568 loop_nodes = new_set(set_cmp, 8);
570 ir_node *backedge_jmp = get_Block_cfgpred(loop_head, backedge_pos);
572 find_loop_nodes(info.itervar_phi, get_loop_node(info.l_itervar_phi, 0), loop_nodes);
573 loop_head_nodes(loop_nodes, &info);
574 copy_loop_body(loop_nodes, &info, unroll_factor);
577 for (value = set_first(loop_nodes); value != NULL; value = set_next(loop_nodes)) {
578 if(value->irn == backedge_jmp)
579 set_Block_cfgpred(loop_head, backedge_pos, value->copy[unroll_factor-2]);
582 set_loop_outs(loop_nodes, &info, unroll_factor);
585 if (needs_preloop(unroll_factor)) {
587 make_preloop(unroll_factor);
592 // adapt_result_usage();
596 /* Performs loop unrolling for the passed graph. */
597 void optimize_loop_unrolling(ir_graph *irg /* unroll factor, max body size */) {
598 ir_graph *rem = current_ir_graph;
599 current_ir_graph = irg;
601 if ( !get_opt_loop_unrolling()) return;
603 /* -- Precompute some information -- */
604 /* Call algorithm that computes the backedges */
605 construct_cf_backedges(irg);
606 /* Call algorithm that computes the dominator trees. */
608 /* Call algorithm that computes the out edges */
610 collect_phiprojs(irg);
612 /* -- Search expressions that can be optimized -- */
613 irg_walk_graph(irg, NULL, do_loop_unroll, NULL);
615 current_ir_graph = rem;