3 * @file loop_unrolling.c
6 * File name: ir/opt/loop_unrolling.c
7 * Purpose: Make loop unrolling.
8 * Author: Beyhan Veliev
12 * Copyright: (c) 2004 Universität Karlsruhe
13 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
17 # include "loop_unrolling.h"
22 # include "irloop_t.h"
23 # include "irgopt_t.h"
24 # include "irnode_t.h"
28 # include "strength_red.h"
32 /*We will know if the head of the loop to be copy.
33 * Default don't copy.*/
34 static int copy_loop_head = 0;
37 ir_node *irn ; /* Node of the loop to be unrolling*/
38 ir_node *copy[MAX_UNROLL] ; /* The copy of the node */
42 * compare two elements of the copies_t set
44 static int set_cmp(const void *elt, const void *key, size_t size)
46 const copies_t *c1 = elt;
47 const copies_t *c2 = key;
49 return c1->irn != c2->irn;
52 static INLINE int * new_backedge_arr(struct obstack *obst, int size)
54 int *res = NEW_ARR_D (int, obst, size);
55 memset(res, 0, sizeof(int) * size);
59 static INLINE void new_backedge_info(ir_node *n) {
60 switch(get_irn_opcode(n)) {
62 n->attr.block.cg_backedge = NULL;
63 n->attr.block.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
66 n->attr.phi_backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
69 n->attr.filter.backedge = new_backedge_arr(current_ir_graph->obst, get_irn_arity(n));
76 * Remember the new node in the old node by using a field all nodes have.
80 set_new_node (ir_node *old, ir_node *new)
86 * Copies the node to the new obstack. The Ins of the new node point to
87 * the predecessors on the old obstack. For block/phi nodes not all
88 * predecessors might be copied. n->link points to the new node.
89 * For Phi and Block nodes the function allocates in-arrays with an arity
90 * only for useful predecessors. The arity is determined by counting
91 * the non-bad predecessors of the block.
93 * @param n The node to be copied
94 * @param env if non-NULL, the node number attribute will be copied to the new node
98 copy_node (ir_node *n, void *env)
102 opcode op = get_irn_opcode(n);
103 int copy_node_nr = env != NULL;
105 /* The end node looses it's flexible in array. This doesn't matter,
106 as dead node elimination builds End by hand, inlineing doesn't use
108 /* assert(n->op == op_End || ((_ARR_DESCR(n->in))->cookie != ARR_F_MAGIC)); */
111 /* node copied already */
114 new_arity = get_irn_arity(n);
116 nn = new_ir_node(get_irn_dbg_info(n),
125 /* Copy the attributes. These might point to additional data. If this
126 was allocated on the old obstack the pointers now are dangling. This
127 frees e.g. the memory of the graph_arr allocated in new_immBlock. */
128 copy_node_attr(n, nn);
129 new_backedge_info(nn);
134 /* for easier debugging, we want to copy the node numbers too */
135 nn->node_nr = n->node_nr;
141 static int is_Phi_in_loop_head(ir_node *phi, ir_node *loop_head) {
142 assert(is_Block(loop_head));
143 return is_Phi(phi) && (get_nodes_block(phi) == loop_head);
147 * Copies predecessors of node to copies of node.
148 * If the predecessor is a loop invariant, then the copy get it
149 * as predecessor, else the copy of the predecessor.
151 * @param *l_n A set, where the node of the loop are saved.
152 * @param *value A element of the set.
153 * @param *info Contains information about the induction variable.
154 * @param *unroll_factor A integer power of 2.
155 * @param *env Free environment pointer.
158 set_preds (set *l_n, copies_t *value, induct_var_info *info, int unroll_factor, void *env)
161 copies_t *value_pred;
163 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
165 irn_arity = get_irn_arity(value->irn);
167 for (i = 0; i < irn_arity; i++) {
168 ir_node *pred = get_irn_n(value->irn, i);
173 value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(pred));
175 if (value->irn != loop_head && !is_Phi_in_loop_head(value->irn, loop_head)) {
176 if (value_pred == NULL) {
177 /* Is loop invariant. */
178 for(p = 0; p < unroll_factor -1; p++)
179 set_irn_n (value->copy[p], i, pred);
182 for(p = 0; p < unroll_factor -1; p++)
183 set_irn_n (value->copy[p], i, value_pred->copy[p]);
188 /* Set the backedge of phi in the loop head.The backedge of phis in the loop head
189 * must now point to the value defined
190 * in the last copie of the loop body.
192 * @param *l_n A set, where the node of the loop are saved.
193 * @param *value A phi in the loop head.
194 * @param *info Contains information about the induction variable.
195 * @param *unroll_factor A integer power of 2.
198 set_phi_backedge(set *l_n, copies_t *value, induct_var_info *info, int unroll_factor)
200 copies_t key, *value_pred;
201 key.irn = get_irn_n(value->irn, info->op_pred_pos);
202 value_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
204 set_Phi_pred(value->irn, info->op_pred_pos, value_pred->copy[unroll_factor - 2]);
207 /** Test for a loop head.
209 * Returns true if the node has predecessors in the loop _and_ out of
210 * the loop. Then it is a loop head: The loop can be entered through
213 * @param *n The node to be tested.
214 * @param *info Contains the loop information.
217 is_loop_head(ir_node *n, induct_var_info *info)
220 int some_outof_loop = 0, some_in_loop = 0;
222 assert(get_irn_op(n) == op_Block);
223 arity = get_Block_n_cfgpreds(n);
225 for (i = 0; i < arity; i++) {
226 ir_node *pred = get_Block_cfgpred(n, i);
228 if (is_loop_invariant(pred, get_loop_node(info->l_itervar_phi, 0))) {
234 return some_outof_loop && some_in_loop;
238 /** Test wether the passed loop is a natural loop.
240 * Returns true if the loop has only one loop header and only a single
243 * @param *info Contains the loop information.
246 is_natural_loop ( induct_var_info *info)
250 l_n_node = get_loop_n_nodes (info->l_itervar_phi);
252 for (i = 1; i < (l_n_node); i ++) {
253 l_node = get_loop_node (info->l_itervar_phi, i);
254 if (is_loop_head(l_node, info)) return 0;
256 if (has_backedges(l_node) && i != l_n_node-1) return 0;
262 /** Serch for all nodes of a loop.
264 * @param *node The induction variable of the loop.
265 * @param *loop_head The head of the loop.
266 * @param *l_n A set, where the node of the loop are saved.
269 find_loop_nodes(ir_node *node, ir_node *loop_head, set *l_n)
272 copies_t key, *value;
274 /* Add this node to the list. */
276 for(i = 0; i < 4 ;i++)
278 value = set_insert(l_n, &key, sizeof(key), HASH_PTR(key.irn));
280 /* Add all outs of this node to the list, if they are within the loop. */
281 for (i = get_irn_n_outs(node) - 1; i >= 0; i--) {
282 ir_node *pred = get_irn_out(node, i);
284 if (!is_loop_invariant(pred, loop_head) &&
285 set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ) {
286 find_loop_nodes(pred, loop_head, l_n);
290 /* Add all ins if they are within the loop. */
291 for(i = get_irn_arity(node) -1; i >=0; i--) {
292 ir_node *pred = get_irn_n(node, i);
294 if (!is_loop_invariant(pred, loop_head) &&
295 set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL ){
296 find_loop_nodes(pred, loop_head, l_n);
301 /* Make a new loop head if copy_loop_head = 1.
303 * @param *l_n A set, where the node of the loop are saved.
304 * @param *info Contains the loop information.
305 * @param *value A element of the set.
306 * @param *unroll_factor A integer power of 2.
310 new_loop_head (set *l_n, induct_var_info *info, copies_t *value, int unroll_factor)
312 copies_t block, *value_backedge_jmp, *backedge_jmp_block;
315 ir_node *backedge_jmp = get_Block_cfgpred(value->irn, info->op_pred_pos);
316 block.irn = backedge_jmp;
318 value_backedge_jmp = set_find( l_n, &block, sizeof(block), HASH_PTR(block.irn));
320 ir_node *new_loop_head = new_Block(1, &backedge_jmp);
321 value->copy[0] = new_loop_head;
323 for(i = 1; i<unroll_factor - 1; i++){
324 ir_node *new_loop_head1 = new_Block(1, &value_backedge_jmp->copy[i-1]);
325 value->copy[i] = new_loop_head1;
329 block.irn = get_nodes_block(backedge_jmp);
330 backedge_jmp_block = set_find( l_n, &block, sizeof(block), HASH_PTR(block.irn));
332 set_irn_n(backedge_jmp_block->copy[0], 0, value_backedge_jmp->irn) ;
334 for(i = 1; i<unroll_factor - 1; i++)
335 set_irn_n(backedge_jmp_block->copy[i], 0, value_backedge_jmp->copy[i - 1]);
340 /* Set all copies of the induction variable.
342 * @param *phi A phi node in the loop head block.
343 * @param *phi_pred The predecessor of the phi along the backedge.
344 * @param *unroll_factor A integer power of 2.
348 set_Phi_copies(copies_t *phi, copies_t *phi_pred, int unroll_factor)
351 phi->copy[0] = phi_pred->irn;
352 for(p = 1; p < unroll_factor -1; p++)
353 phi->copy[p] = phi_pred->copy[p -1];
356 /* Decide if the loop head to be copy. A head with important nodes
359 * @param *l_n A set, where the node of the loop are saved.
360 * @param *info Contains information about the induction variable.
363 loop_head_nodes(set *l_n, induct_var_info *info)
366 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
368 for (value = set_first(l_n); value != NULL; value = set_next(l_n))
369 if(value->irn->op != op_Block &&
370 get_nodes_block(value->irn) == loop_head)
371 switch(get_irn_opcode(value->irn)) {
387 /** Copy all loop nodes.
389 * @param *l_n Contains all nodes of the loop.
390 * @param *info Contains the loop information.
391 * @param *unroll_factor A integer power of 2.
394 copy_loop_body(set *l_n, induct_var_info *info, int unroll_factor)
396 copies_t *value, *info_op, *phi, *loop_h, key, *value1;
398 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
401 for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
402 if(value->irn == loop_head)
404 else if (is_Phi_in_loop_head(value->irn, loop_head))
406 else if(copy_loop_head){
407 for (i = 0; i<unroll_factor -1; i++){
408 copy_node(value->irn, NULL);
409 value->copy[i] = get_irn_link(value->irn);
412 if((value->irn->op == op_Block &&
413 value->irn != loop_head) ||
414 (value->irn->op != op_Block &&
415 get_nodes_block(value->irn) != loop_head))
416 for (i = 0; i<unroll_factor -1; i++){
417 copy_node(value->irn, NULL);
418 value->copy[i] = get_irn_link(value->irn);
422 /* Treat the loop head block */
423 new_loop_head (l_n, info, loop_h, unroll_factor);
425 /* Similarily treat the Phis in the loop head block. */
427 info_op = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
428 assert(info_op->irn == get_Phi_pred(info->itervar_phi, info->op_pred_pos));
429 for (i = 0; i < get_irn_n_outs(loop_head); ++i) {
430 ir_node *phi = get_irn_out(loop_head, i);
433 key.irn = get_Phi_pred(phi, info->op_pred_pos); // info->op;
434 copies_t *phi_pred = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
436 copies_t *phi_op = set_find(l_n, &key, sizeof(key), HASH_PTR(key.irn));
437 set_Phi_copies(phi_op, phi_pred, unroll_factor);
442 for (value = set_first(l_n); value != NULL; value = set_next(l_n)) {
446 set_preds(l_n, value, info, unroll_factor, NULL);
447 else if((value->irn->op != op_Block) && get_nodes_block(value->irn) != loop_head)
448 set_preds(l_n, value, info, unroll_factor, NULL);
450 if (is_Phi_in_loop_head(value->irn, loop_head))
451 set_phi_backedge(l_n, value, info, unroll_factor);
453 if ((value->irn->op != op_Block) && !is_Phi_in_loop_head(value->irn, loop_head)) {
454 ir_node *nodes_block = get_nodes_block(value->irn);
456 if(!copy_loop_head && nodes_block == loop_head)
459 key.irn = nodes_block;
460 value1 = set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn));
462 for(p = 0; p < unroll_factor - 1; p++){
463 set_nodes_block(value->copy[p], value1->copy[p]);
464 //add_End_keepalive(get_irg_end(current_ir_graph), value->copy[p]);
471 set_loop_outs(set *l_n, induct_var_info *info, int unroll_factor)
473 copies_t *value, key;
475 ir_node *loop_head = get_loop_node(info->l_itervar_phi, 0);
477 value = set_first(l_n);
478 for( ; value != NULL; value = set_next(l_n))
479 if(value->irn != info->op && !is_Phi_in_loop_head(value->irn, loop_head) &&
480 get_irn_opcode(value->irn) != iro_Proj)
481 for(i = 0; i < get_irn_n_outs(value->irn); i++){
482 key.irn = get_irn_out(value->irn, i);
483 if(set_find( l_n, &key, sizeof(key), HASH_PTR(key.irn)) == NULL)
484 for(p = 0; p < get_irn_arity(key.irn); p++)
485 if(value->irn == get_irn_n(key.irn, p))
486 set_irn_n (key.irn, p, value->copy[unroll_factor-2]);
491 /** Unroll the loop boby with a factor that must be power of two.
493 * @param *n A ir node.
494 * @param *env Free environment pointer.
496 static void do_loop_unroll(ir_node *n, void *env){
498 induct_var_info info;
499 info.itervar_phi = n;
500 int l_sons = 0, unroll_factor = 0;
502 /* The ir node must be a induction varible. */
504 if (get_irn_op (n) == op_Phi) {
505 if (is_induction_variable (&info) == NULL) return;
508 /* Brute force limiting of loop body size. */
509 if (get_loop_n_nodes(info.l_itervar_phi) > 2 ) return;
511 /* Only unroll loops that compare against a constant for exiting. */
512 if (info.cmp == NULL) return;
514 /* We only want to unroll innermost loops. */
515 l_sons = get_loop_n_sons (info.l_itervar_phi);
519 ir_node* cmp_out = get_irn_out(info.cmp, 0);
521 if(!is_Proj(cmp_out)) return;
522 if(get_irn_op(info.increment) != op_Const) return;
524 int cmp_typ = get_Proj_proj(cmp_out);
525 int init = get_tarval_long(get_Const_tarval
526 (get_Phi_pred(info.itervar_phi, info.init_pred_pos)));
527 int iter_end = get_tarval_long(get_Const_tarval(info.cmp_const));
528 int iter_increment = get_tarval_long(get_Const_tarval(info.increment));
529 int diff, iter_number;
537 iter_increment = iter_increment < 0 ? -iter_increment : iter_increment;
538 diff = iter_end - init;
540 if (diff == 0 || iter_increment == 0) return;
542 iter_number = diff/iter_increment;
543 if((cmp_typ == 3 || cmp_typ == 5) && (iter_end % iter_increment == 0))
546 if(iter_number % 4 == 0)
548 else if(iter_number % 3 == 0)
550 else if(iter_number % 2 == 0)
555 printf("\ninit %d,\n iter_end %d, \n diff %d, cmp_typ\n %d, \n unroll_factor %d", init, iter_end, diff, cmp_typ, unroll_factor);
557 // int unroll_factor = 4; /* Must be power of 2. */
558 assert(unroll_factor <= MAX_UNROLL);
562 loop_head = (is_natural_loop(&info)) ? get_loop_node(info.l_itervar_phi, 0) : NULL;
564 assert(loop_head != NULL && is_Block(loop_head));
566 /* We assume, the loop head has exactly one backedge. The position of
567 the backedge is in the following variable: */
569 backedge_pos = (is_backedge(loop_head, 0)) ? 0:1;
571 /* A set with the nodes to copy. */
573 loop_nodes = new_set(set_cmp, 8);
575 ir_node *backedge_jmp = get_Block_cfgpred(loop_head, backedge_pos);
577 find_loop_nodes(info.itervar_phi, get_loop_node(info.l_itervar_phi, 0), loop_nodes);
578 loop_head_nodes(loop_nodes, &info);
579 copy_loop_body(loop_nodes, &info, unroll_factor);
582 for (value = set_first(loop_nodes); value != NULL; value = set_next(loop_nodes)) {
583 if(value->irn == backedge_jmp)
584 set_Block_cfgpred(loop_head, backedge_pos, value->copy[unroll_factor-2]);
587 set_loop_outs(loop_nodes, &info, unroll_factor);
590 if (needs_preloop(unroll_factor)) {
592 make_preloop(unroll_factor);
597 // adapt_result_usage();
601 /* Performs loop unrolling for the passed graph. */
602 void optimize_loop_unrolling(ir_graph *irg /* unroll factor, max body size */) {
603 ir_graph *rem = current_ir_graph;
604 current_ir_graph = irg;
606 if ( !get_opt_loop_unrolling()) return;
608 /* -- Precompute some information -- */
609 /* Call algorithm that computes the backedges */
610 construct_cf_backedges(irg);
611 /* Call algorithm that computes the dominator trees. */
613 /* Call algorithm that computes the out edges */
615 collect_phiprojs(irg);
617 /* -- Search expressions that can be optimized -- */
618 irg_walk_graph(irg, NULL, do_loop_unroll, NULL);
620 current_ir_graph = rem;