3 * File name: ir/opt/opt_osr.
4 * Purpose: Operator Strength Reduction,
5 * Keith D. Cooper, L. Taylor Simpson, Christopher A. Vick
10 * Copyright: (c) 2006 Universität Karlsruhe
11 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
42 /** The debug handle. */
43 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
47 ir_node *head; /**< the head of the list */
51 typedef struct node_entry {
52 unsigned DFSnum; /**< the DFS number of this node */
53 unsigned low; /**< the low number of this node */
54 ir_node *header; /**< the header of this node */
55 int in_stack; /**< flag, set if the node is on the stack */
56 ir_node *next; /**< link to the next node the the same scc */
57 scc *pscc; /**< the scc of this node */
58 unsigned POnum; /**< the post order number for blocks */
61 /** The environment. */
62 typedef struct iv_env {
63 struct obstack obst; /**< an obstack for allocations */
64 ir_node **stack; /**< the node stack */
65 int tos; /**< tos index */
66 unsigned nextDFSnum; /**< the current DFS number */
67 unsigned POnum; /**< current post order number */
68 set *quad_map; /**< a map from (op, iv, rc) to node */
69 set *lftr_edges; /**< the set of lftr edges */
70 unsigned replaced; /**< number of replaced ops */
71 unsigned lftr_replaced; /**< number of applied linear function test replacements */
72 unsigned flags; /**< additional flags */
76 * An entry in the (op, node, node) -> node map.
78 typedef struct quad_t {
79 opcode code; /**< the opcode of the reduced operation */
80 ir_node *op1; /**< the first operand the reduced operation */
81 ir_node *op2; /**< the second operand of the reduced operation */
83 ir_node *res; /**< the reduced operation */
89 typedef struct LFTR_edge {
90 ir_node *src; /**< the source node */
91 ir_node *dst; /**< the destination node */
92 opcode code; /**< the opcode that must be applied */
93 ir_node *rc; /**< the region const that must be applied */
97 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env);
100 * Compare two LFTR edges.
102 static int LFTR_cmp(const void *e1, const void *e2, size_t size) {
103 const LFTR_edge *l1 = e1;
104 const LFTR_edge *l2 = e2;
106 return l1->src != l2->src;
112 static LFTR_edge *LFTR_find(ir_node *src, iv_env *env) {
117 return set_find(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
123 static void LFTR_add(ir_node *src, ir_node *dst, opcode code, ir_node *rc, iv_env *env) {
132 * There might be more than one edge here. This is rather bad
133 * because we currently store only one.
135 // assert(LFTR_find(src, env) == NULL);
136 set_insert(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
140 * Gets the node_entry of a node
142 static node_entry *get_irn_ne(ir_node *irn, iv_env *env) {
143 node_entry *e = get_irn_link(irn);
146 e = obstack_alloc(&env->obst, sizeof(*e));
147 memset(e, 0, sizeof(*e));
148 set_irn_link(irn, e);
154 * Check if irn is an IV.
156 * @param irn the node to check
157 * @param env the environment
159 * @returns the header if it is one, NULL else
161 static ir_node *is_iv(ir_node *irn, iv_env *env) {
162 return get_irn_ne(irn, env)->header;
166 * Check if irn is a region constant.
167 * The block or irn must strictly dominate the header block.
169 * @param irn the node to check
170 * @param header_block the header block of the induction variable
172 static int is_rc(ir_node *irn, ir_node *header_block) {
173 ir_node *block = get_nodes_block(irn);
175 return (block != header_block) && block_dominates(block, header_block);
179 * Set compare function for the quad set.
181 static int quad_cmp(const void *e1, const void *e2, size_t size) {
182 const quad_t *c1 = e1;
183 const quad_t *c2 = e2;
185 return c1->code != c2->code || c1->op1 != c2->op1 || c1->op2 != c2->op2;
189 * Check if an reduced operation was already calculated.
191 * @param code the opcode of the operation
192 * @param op1 the first operand of the operation
193 * @param op2 the second operand of the operation
194 * @param env the environment
196 * @return the already reduced node or NULL if this operation is not yet reduced
198 static ir_node *search(opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
205 entry = set_find(env->quad_map, &key, sizeof(key),
206 (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
213 * Add an reduced operation.
215 * @param code the opcode of the operation
216 * @param op1 the first operand of the operation
217 * @param op2 the second operand of the operation
218 * @param result the result of the reduced operation
219 * @param env the environment
221 static void add(opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env *env) {
229 set_insert(env->quad_map, &key, sizeof(key),
230 (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
234 * Find a location where to place a bin-op whose operands are in
237 * @param block1 the block of the first operand
238 * @param block2 the block of the second operand
240 * Note that we know here that such a place must exists. Moreover, this means
241 * that either block1 dominates block2 or vice versa. So, just return
244 static ir_node *find_location(ir_node *block1, ir_node *block2) {
245 if (block_dominates(block1, block2))
247 assert(block_dominates(block2, block1));
252 * Create a node that executes an op1 code op1 operation.
254 * @param code the opcode to execute
255 * @param db debug info to add to the new node
256 * @param op1 the first operand
257 * @param op2 the second operand
258 * @param mode the mode of the new operation
260 * @return the newly created node
262 static ir_node *do_apply(opcode code, dbg_info *db, ir_node *op1, ir_node *op2, ir_mode *mode) {
263 ir_graph *irg = current_ir_graph;
265 ir_node *block = find_location(get_nodes_block(op1), get_nodes_block(op2));
269 result = new_rd_Mul(db, irg, block, op1, op2, mode);
272 result = new_rd_Add(db, irg, block, op1, op2, mode);
275 result = new_rd_Sub(db, irg, block, op1, op2, mode);
285 * The Apply operation.
287 * @param orig the node that represent the original operation and determines
288 * the opcode, debug-info and mode of a newly created one
289 * @param op1 the first operand
290 * @param op2 the second operand
291 * @param env the environment
293 * @return the newly created node
295 static ir_node *apply(ir_node *orig, ir_node *op1, ir_node *op2, iv_env *env) {
296 opcode code = get_irn_opcode(orig);
297 ir_node *result = search(code, op1, op2, env);
300 dbg_info *db = get_irn_dbg_info(orig);
301 ir_node *op1_header = get_irn_ne(op1, env)->header;
302 ir_node *op2_header = get_irn_ne(op2, env)->header;
304 if (op1_header != NULL && is_rc(op2, op1_header)) {
305 result = reduce(orig, op1, op2, env);
307 else if (op2_header != NULL && is_rc(op1, op2_header)) {
308 result = reduce(orig, op2, op1, env);
311 result = do_apply(code, db, op1, op2, get_irn_mode(orig));
312 get_irn_ne(result, env)->header = NULL;
319 * The Reduce operation.
321 * @param orig the node that represent the original operation and determines
322 * the opcode, debug-info and mode of a newly created one
323 * @param iv the induction variable
324 * @param rc the region constant
325 * @param env the environment
327 * @return the reduced node
329 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
330 opcode code = get_irn_opcode(orig);
331 ir_node *result = search(code, iv, rc, env);
334 node_entry *e, *iv_e;
336 ir_mode *mode = get_irn_mode(orig);
338 result = exact_copy(iv);
339 if (mode_is_reference(mode)) {
340 /* bad case: we replace a reference mode calculation.
341 assure that the new IV will be a reference one */
342 set_irn_mode(result, mode);
344 add(code, iv, rc, result, env);
345 DB((dbg, LEVEL_3, " Created new %+F for %+F (%s %+F)\n", result, iv,
346 get_irn_opname(orig), rc));
348 iv_e = get_irn_ne(iv, env);
349 e = get_irn_ne(result, env);
350 e->header = iv_e->header;
352 /* create the LFTR edge */
353 LFTR_add(iv, result, code, rc, env);
355 n = get_irn_arity(result);
356 for (i = 0; i < n; ++i) {
357 ir_node *o = get_irn_n(result, i);
359 e = get_irn_ne(o, env);
360 if (e->header == iv_e->header)
361 o = reduce(orig, o, rc, env);
362 else if (is_Phi(result))
363 o = apply(orig, o, rc, env);
367 o = apply(orig, o, rc, env);
371 set_irn_n(result, i, o);
375 DB((dbg, LEVEL_3, " Already Created %+F for %+F (%s %+F)\n", result, iv,
376 get_irn_opname(orig), rc));
382 * The Replace operation.
384 * @param irn the node that will be replaced
385 * @param iv the induction variable
386 * @param rc the region constant
387 * @param env the environment
389 static void replace(ir_node *irn, ir_node *iv, ir_node *rc, iv_env *env) {
392 DB((dbg, LEVEL_2, " Replacing %+F\n", irn));
394 result = reduce(irn, iv, rc, env);
396 node_entry *e, *iv_e;
398 exchange(irn, result);
399 e = get_irn_ne(result, env);
400 iv_e = get_irn_ne(iv, env);
401 e->header = iv_e->header;
406 * Check if a node can be replaced (+, -, *).
408 * @param irn the node to check
409 * @param env the environment
411 * @return non-zero if irn should be Replace'd
413 static int check_replace(ir_node *irn, iv_env *env) {
414 ir_node *left, *right, *iv, *rc;
415 ir_op *op = get_irn_op(irn);
416 opcode code = get_op_code(op);
425 left = get_binop_left(irn);
426 right = get_binop_right(irn);
428 liv = is_iv(left, env);
429 riv = is_iv(right, env);
430 if (liv && is_rc(right, liv)) {
431 iv = left; rc = right;
433 else if (riv && is_op_commutative(op) &&
435 iv = right; rc = left;
439 replace(irn, iv, rc, env);
449 * Check which SCC's are induction variables.
452 * @param env the environment
454 static void classify_iv(scc *pscc, iv_env *env) {
455 ir_node *irn, *next, *header = NULL;
459 /* find the header block for this scc */
460 for (irn = pscc->head; irn; irn = next) {
461 node_entry *e = get_irn_link(irn);
462 ir_node *block = get_nodes_block(irn);
465 b = get_irn_ne(block, env);
468 if (h->POnum < b->POnum) {
479 /* check if this scc contains only Phi, Add or Sub nodes */
480 for (irn = pscc->head; irn; irn = next) {
481 node_entry *e = get_irn_ne(irn, env);
484 switch (get_irn_opcode(irn)) {
488 for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
489 ir_node *pred = get_irn_n(irn, j);
490 node_entry *pe = get_irn_ne(pred, env);
492 if (pe->pscc != e->pscc) {
493 /* not in the same SCC, must be a region const */
494 if (! is_rc(pred, header)) {
495 /* not an induction variable */
502 /* not an induction variable */
506 /* found an induction variable */
507 DB((dbg, LEVEL_2, " Found an induction variable:\n "));
509 /* set the header for every node in this scc */
510 for (irn = pscc->head; irn; irn = next) {
511 node_entry *e = get_irn_ne(irn, env);
514 DB((dbg, LEVEL_2, " %+F,", irn));
516 DB((dbg, LEVEL_2, "\n"));
520 for (irn = pscc->head; irn; irn = next) {
521 node_entry *e = get_irn_ne(irn, env);
524 if (! check_replace(irn, env))
532 * @param pscc the SCC
533 * @param env the environment
535 static void process_scc(scc *pscc, iv_env *env) {
536 ir_node *head = pscc->head;
537 node_entry *e = get_irn_link(head);
543 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
544 for (irn = pscc->head; irn; irn = next) {
545 node_entry *e = get_irn_link(irn);
549 DB((dbg, LEVEL_4, " %+F,", irn));
551 DB((dbg, LEVEL_4, "\n"));
555 if (e->next == NULL) {
556 /* this SCC has only a single member */
557 check_replace(head, env);
560 classify_iv(pscc, env);
565 * Push a node onto the stack.
567 * @param env the environment
568 * @param n the node to push
570 static void push(iv_env *env, ir_node *n) {
573 if (env->tos == ARR_LEN(env->stack)) {
574 int nlen = ARR_LEN(env->stack) * 2;
575 ARR_RESIZE(ir_node *, env->stack, nlen);
577 env->stack[env->tos++] = n;
578 e = get_irn_ne(n, env);
583 * pop a node from the stack
585 * @param env the environment
587 * @return The topmost node
589 static ir_node *pop(iv_env *env)
591 ir_node *n = env->stack[--env->tos];
592 node_entry *e = get_irn_ne(n, env);
599 * Do Tarjan's SCC algorithm and drive OSR.
601 * @param irn start at this node
602 * @param env the environment
604 static void dfs(ir_node *irn, iv_env *env)
607 node_entry *node = get_irn_ne(irn, env);
609 mark_irn_visited(irn);
611 /* do not put blocks into the scc */
613 n = get_irn_arity(irn);
614 for (i = 0; i < n; ++i) {
615 ir_node *pred = get_irn_n(irn, i);
617 if (irn_not_visited(pred))
622 ir_node *block = get_nodes_block(irn);
624 node->DFSnum = env->nextDFSnum++;
625 node->low = node->DFSnum;
628 /* handle the block */
629 if (irn_not_visited(block))
632 n = get_irn_arity(irn);
633 for (i = 0; i < n; ++i) {
634 ir_node *pred = get_irn_n(irn, i);
635 node_entry *o = get_irn_ne(pred, env);
637 if (irn_not_visited(pred)) {
639 node->low = MIN(node->low, o->low);
641 if (o->DFSnum < node->DFSnum && o->in_stack)
642 node->low = MIN(o->DFSnum, node->low);
644 if (node->low == node->DFSnum) {
645 scc *pscc = obstack_alloc(&env->obst, sizeof(*pscc));
653 e = get_irn_ne(x, env);
655 e->next = pscc->head;
659 process_scc(pscc, env);
665 * Do the DFS by starting at the End node of a graph.
667 * @param irg the graph to process
668 * @param env the environment
670 static void do_dfs(ir_graph *irg, iv_env *env) {
671 ir_graph *rem = current_ir_graph;
672 ir_node *end = get_irg_end(irg);
675 current_ir_graph = irg;
676 inc_irg_visited(irg);
678 /* visit all visible nodes */
681 /* visit the keep-alives */
682 n = get_End_n_keepalives(end);
683 for (i = 0; i < n; ++i) {
684 ir_node *ka = get_End_keepalive(end, i);
686 if (irn_not_visited(ka))
690 current_ir_graph = rem;
694 * Post-block-walker: assign the post-order number.
696 static void assign_po(ir_node *block, void *ctx) {
698 node_entry *e = get_irn_ne(block, env);
700 e->POnum = env->POnum++;
704 * Follows the LFTR edges and return the last node in the chain.
706 * @param irn the node that should be followed
707 * @param env the IV environment
710 * In the current implementation only the last edge is stored, so
711 * only one chain exists. That's why we might miss some opportunities.
713 static ir_node *followEdges(ir_node *irn, iv_env *env) {
715 LFTR_edge *e = LFTR_find(irn, env);
724 * Apply one LFTR edge operation.
725 * Return NULL if the transformation cannot be done safely without
728 * @param rc the IV node that should be translated
729 * @param e the LFTR edge
730 * @param env the IV environment
732 * @return the translated region constant or NULL
733 * if the translation was not possible
736 * In the current implementation only the last edge is stored, so
737 * only one chain exists. That's why we might miss some opportunities.
739 static ir_node *applyOneEdge(ir_node *rc, LFTR_edge *e, iv_env *env) {
740 if (env->flags & osr_flag_lftr_with_ov_check) {
741 tarval *tv_l, *tv_r, *tv;
742 tarval_int_overflow_mode_t ovmode;
744 /* overflow can only be decided for Consts */
745 if (! is_Const(e->rc)) {
746 DB((dbg, LEVEL_4, " = UNKNOWN (%+F)", e->rc));
750 tv_l = get_Const_tarval(rc);
751 tv_r = get_Const_tarval(e->rc);
753 ovmode = tarval_get_integer_overflow_mode();
754 tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
758 tv = tarval_mul(tv_l, tv_r);
759 DB((dbg, LEVEL_4, " * %+F", tv_r));
762 tv = tarval_add(tv_l, tv_r);
763 DB((dbg, LEVEL_4, " + %+F", tv_r));
766 tv = tarval_sub(tv_l, tv_r);
767 DB((dbg, LEVEL_4, " - %+F", tv_r));
773 tarval_set_integer_overflow_mode(ovmode);
775 if (tv == tarval_bad) {
776 DB((dbg, LEVEL_4, " = OVERFLOW"));
779 return new_r_Const(current_ir_graph, get_irn_n(rc, -1), get_tarval_mode(tv), tv);
781 return do_apply(e->code, NULL, rc, e->rc, get_irn_mode(rc));
785 * Applies the operations represented by the LFTR edges to a
786 * region constant and returns the value.
787 * Return NULL if the transformation cannot be done safely without
790 * @param iv the IV node that starts the LFTR edge chain
791 * @param rc the region constant that should be translated
792 * @param env the IV environment
794 * @return the translated region constant or NULL
795 * if the translation was not possible
797 static ir_node *applyEdges(ir_node *iv, ir_node *rc, iv_env *env) {
800 if (env->flags & osr_flag_lftr_with_ov_check) {
801 /* overflow can only be decided for Consts */
802 if (! is_Const(rc)) {
803 DB((dbg, LEVEL_4, " = UNKNOWN (%+F)\n", rc));
806 DB((dbg, LEVEL_4, "%+F", get_Const_tarval(rc)));
809 for (irn = iv; rc;) {
810 LFTR_edge *e = LFTR_find(irn, env);
812 rc = applyOneEdge(rc, e, env);
818 DB((dbg, LEVEL_3, "\n"));
823 * Walker, finds Cmp(iv, rc) or Cmp(rc, iv)
824 * and tries to optimize them.
826 static void do_lftr(ir_node *cmp, void *ctx) {
828 ir_node *left, *right, *liv, *riv;
830 ir_node *nleft = NULL, *nright = NULL;
832 if (get_irn_op(cmp) != op_Cmp)
835 left = get_Cmp_left(cmp);
836 right = get_Cmp_right(cmp);
838 liv = is_iv(left, env);
839 riv = is_iv(right, env);
840 if (liv && is_rc(right, liv)) {
841 iv = left; rc = right;
843 nright = applyEdges(iv, rc, env);
845 nleft = followEdges(iv, env);
848 else if (riv && is_rc(left, riv)) {
849 iv = right; rc = left;
851 nleft = applyEdges(iv, rc, env);
853 nright = followEdges(iv, env);
857 if (nleft && nright) {
858 DB((dbg, LEVEL_2, " LFTR for %+F\n", cmp));
859 set_Cmp_left(cmp, nleft);
860 set_Cmp_right(cmp, nright);
861 ++env->lftr_replaced;
866 * do linear function test replacement.
868 * @param irg the graph that should be optimized
869 * @param env the IV environment
871 static void lftr(ir_graph *irg, iv_env *env) {
872 irg_walk_graph(irg, NULL, do_lftr, env);
876 * Pre-walker: set all node links to NULL and fix the
877 * block of Proj nodes.
879 static void clear_and_fix(ir_node *irn, void *env)
881 set_irn_link(irn, NULL);
884 ir_node *pred = get_Proj_pred(irn);
885 set_irn_n(irn, -1, get_irn_n(pred, -1));
889 /* Performs Operator Strength Reduction for the passed graph. */
890 void opt_osr(ir_graph *irg, unsigned flags) {
893 if (! get_opt_strength_red())
896 FIRM_DBG_REGISTER(dbg, "firm.opt.osr");
897 // firm_dbg_set_mask(dbg, SET_LEVEL_3);
899 DB((dbg, LEVEL_1, "Doing Operator Strength Reduction for %+F\n", irg));
901 obstack_init(&env.obst);
902 env.stack = NEW_ARR_F(ir_node *, 128);
906 env.quad_map = new_set(quad_cmp, 64);
907 env.lftr_edges = new_set(LFTR_cmp, 64);
909 env.lftr_replaced = 0;
912 /* Clear all links and move Proj nodes into the
913 the same block as it's predecessors.
914 This can improve the placement of new nodes.
916 irg_walk_graph(irg, NULL, clear_and_fix, NULL);
918 /* we need dominance */
920 assure_irg_outs(irg);
922 /* calculate the post order number for blocks. */
923 irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
925 /* calculate the SCC's and drive OSR. */
929 /* try linear function test replacements */
932 set_irg_outs_inconsistent(irg);
933 set_irg_loopinfo_inconsistent(irg);
935 DB((dbg, LEVEL_1, "Replacements: %u + %u (lftr)\n\n", env.replaced, env.lftr_replaced));
938 del_set(env.lftr_edges);
939 del_set(env.quad_map);
940 DEL_ARR_F(env.stack);
941 obstack_free(&env.obst, NULL);