3 * File name: ir/opt/opt_osr.
4 * Purpose: Operator Strength Reduction,
5 * Keith D. Cooper, L. Taylor Simpson, Christopher A. Vick
10 * Copyright: (c) 2006 Universität Karlsruhe
11 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
43 /** The debug handle. */
44 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
48 ir_node *head; /**< the head of the list */
52 typedef struct node_entry {
53 unsigned DFSnum; /**< the DFS number of this node */
54 unsigned low; /**< the low number of this node */
55 ir_node *header; /**< the header of this node */
56 int in_stack; /**< flag, set if the node is on the stack */
57 ir_node *next; /**< link to the next node the the same scc */
58 scc *pscc; /**< the scc of this node */
59 unsigned POnum; /**< the post order number for blocks */
62 /** The environment. */
63 typedef struct iv_env {
64 struct obstack obst; /**< an obstack for allocations */
65 ir_node **stack; /**< the node stack */
66 int tos; /**< tos index */
67 unsigned nextDFSnum; /**< the current DFS number */
68 unsigned POnum; /**< current post order number */
69 set *quad_map; /**< a map from (op, iv, rc) to node */
70 set *lftr_edges; /**< the set of lftr edges */
71 unsigned replaced; /**< number of replaced ops */
72 unsigned lftr_replaced; /**< number of applied linear function test replacements */
73 unsigned flags; /**< additional flags */
77 * An entry in the (op, node, node) -> node map.
79 typedef struct quad_t {
80 opcode code; /**< the opcode of the reduced operation */
81 ir_node *op1; /**< the first operand the reduced operation */
82 ir_node *op2; /**< the second operand of the reduced operation */
84 ir_node *res; /**< the reduced operation */
90 typedef struct LFTR_edge {
91 ir_node *src; /**< the source node */
92 ir_node *dst; /**< the destination node */
93 opcode code; /**< the opcode that must be applied */
94 ir_node *rc; /**< the region const that must be applied */
98 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env);
101 * Compare two LFTR edges.
103 static int LFTR_cmp(const void *e1, const void *e2, size_t size) {
104 const LFTR_edge *l1 = e1;
105 const LFTR_edge *l2 = e2;
107 return l1->src != l2->src;
113 static LFTR_edge *LFTR_find(ir_node *src, iv_env *env) {
118 return set_find(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
124 static void LFTR_add(ir_node *src, ir_node *dst, opcode code, ir_node *rc, iv_env *env) {
133 * There might be more than one edge here. This is rather bad
134 * because we currently store only one.
136 // assert(LFTR_find(src, env) == NULL);
137 set_insert(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
141 * Gets the node_entry of a node
143 static node_entry *get_irn_ne(ir_node *irn, iv_env *env) {
144 node_entry *e = get_irn_link(irn);
147 e = obstack_alloc(&env->obst, sizeof(*e));
148 memset(e, 0, sizeof(*e));
149 set_irn_link(irn, e);
155 * Check if irn is an IV.
157 * @param irn the node to check
158 * @param env the environment
160 * @returns the header if it is one, NULL else
162 static ir_node *is_iv(ir_node *irn, iv_env *env) {
163 return get_irn_ne(irn, env)->header;
167 * Check if irn is a region constant.
168 * The block or irn must strictly dominate the header block.
170 * @param irn the node to check
171 * @param header_block the header block of the induction variable
173 static int is_rc(ir_node *irn, ir_node *header_block) {
174 ir_node *block = get_nodes_block(irn);
176 return (block != header_block) && block_dominates(block, header_block);
180 * Set compare function for the quad set.
182 static int quad_cmp(const void *e1, const void *e2, size_t size) {
183 const quad_t *c1 = e1;
184 const quad_t *c2 = e2;
186 return c1->code != c2->code || c1->op1 != c2->op1 || c1->op2 != c2->op2;
190 * Check if an reduced operation was already calculated.
192 * @param code the opcode of the operation
193 * @param op1 the first operand of the operation
194 * @param op2 the second operand of the operation
195 * @param env the environment
197 * @return the already reduced node or NULL if this operation is not yet reduced
199 static ir_node *search(opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
206 entry = set_find(env->quad_map, &key, sizeof(key),
207 (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
214 * Add an reduced operation.
216 * @param code the opcode of the operation
217 * @param op1 the first operand of the operation
218 * @param op2 the second operand of the operation
219 * @param result the result of the reduced operation
220 * @param env the environment
222 static void add(opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env *env) {
230 set_insert(env->quad_map, &key, sizeof(key),
231 (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
235 * Find a location where to place a bin-op whose operands are in
238 * @param block1 the block of the first operand
239 * @param block2 the block of the second operand
241 * Note that we know here that such a place must exists. Moreover, this means
242 * that either block1 dominates block2 or vice versa. So, just return
245 static ir_node *find_location(ir_node *block1, ir_node *block2) {
246 if (block_dominates(block1, block2))
248 assert(block_dominates(block2, block1));
253 * Create a node that executes an op1 code op1 operation.
255 * @param code the opcode to execute
256 * @param db debug info to add to the new node
257 * @param op1 the first operand
258 * @param op2 the second operand
259 * @param mode the mode of the new operation
261 * @return the newly created node
263 static ir_node *do_apply(opcode code, dbg_info *db, ir_node *op1, ir_node *op2, ir_mode *mode) {
264 ir_graph *irg = current_ir_graph;
266 ir_node *block = find_location(get_nodes_block(op1), get_nodes_block(op2));
270 result = new_rd_Mul(db, irg, block, op1, op2, mode);
273 result = new_rd_Add(db, irg, block, op1, op2, mode);
276 result = new_rd_Sub(db, irg, block, op1, op2, mode);
286 * The Apply operation.
288 * @param orig the node that represent the original operation and determines
289 * the opcode, debug-info and mode of a newly created one
290 * @param op1 the first operand
291 * @param op2 the second operand
292 * @param env the environment
294 * @return the newly created node
296 static ir_node *apply(ir_node *orig, ir_node *op1, ir_node *op2, iv_env *env) {
297 opcode code = get_irn_opcode(orig);
298 ir_node *result = search(code, op1, op2, env);
301 dbg_info *db = get_irn_dbg_info(orig);
302 ir_node *op1_header = get_irn_ne(op1, env)->header;
303 ir_node *op2_header = get_irn_ne(op2, env)->header;
305 if (op1_header != NULL && is_rc(op2, op1_header)) {
306 result = reduce(orig, op1, op2, env);
308 else if (op2_header != NULL && is_rc(op1, op2_header)) {
309 result = reduce(orig, op2, op1, env);
312 result = do_apply(code, db, op1, op2, get_irn_mode(orig));
313 get_irn_ne(result, env)->header = NULL;
320 * The Reduce operation.
322 * @param orig the node that represent the original operation and determines
323 * the opcode, debug-info and mode of a newly created one
324 * @param iv the induction variable
325 * @param rc the region constant
326 * @param env the environment
328 * @return the reduced node
330 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
331 opcode code = get_irn_opcode(orig);
332 ir_node *result = search(code, iv, rc, env);
335 node_entry *e, *iv_e;
337 ir_mode *mode = get_irn_mode(orig);
339 result = exact_copy(iv);
340 if (mode_is_reference(mode)) {
341 /* bad case: we replace a reference mode calculation.
342 assure that the new IV will be a reference one */
343 set_irn_mode(result, mode);
345 add(code, iv, rc, result, env);
346 DB((dbg, LEVEL_3, " Created new %+F for %+F (%s %+F)\n", result, iv,
347 get_irn_opname(orig), rc));
349 iv_e = get_irn_ne(iv, env);
350 e = get_irn_ne(result, env);
351 e->header = iv_e->header;
353 /* create the LFTR edge */
354 LFTR_add(iv, result, code, rc, env);
356 n = get_irn_arity(result);
357 for (i = 0; i < n; ++i) {
358 ir_node *o = get_irn_n(result, i);
360 e = get_irn_ne(o, env);
361 if (e->header == iv_e->header)
362 o = reduce(orig, o, rc, env);
363 else if (is_Phi(result))
364 o = apply(orig, o, rc, env);
368 o = apply(orig, o, rc, env);
372 set_irn_n(result, i, o);
376 DB((dbg, LEVEL_3, " Already Created %+F for %+F (%s %+F)\n", result, iv,
377 get_irn_opname(orig), rc));
383 * The Replace operation.
385 * @param irn the node that will be replaced
386 * @param iv the induction variable
387 * @param rc the region constant
388 * @param env the environment
390 static void replace(ir_node *irn, ir_node *iv, ir_node *rc, iv_env *env) {
393 DB((dbg, LEVEL_2, " Replacing %+F\n", irn));
395 result = reduce(irn, iv, rc, env);
397 node_entry *e, *iv_e;
399 hook_strength_red(current_ir_graph, irn);
400 exchange(irn, result);
401 e = get_irn_ne(result, env);
402 iv_e = get_irn_ne(iv, env);
403 e->header = iv_e->header;
408 * Check if a node can be replaced (+, -, *).
410 * @param irn the node to check
411 * @param env the environment
413 * @return non-zero if irn should be Replace'd
415 static int check_replace(ir_node *irn, iv_env *env) {
416 ir_node *left, *right, *iv, *rc;
417 ir_op *op = get_irn_op(irn);
418 opcode code = get_op_code(op);
427 left = get_binop_left(irn);
428 right = get_binop_right(irn);
430 liv = is_iv(left, env);
431 riv = is_iv(right, env);
432 if (liv && is_rc(right, liv)) {
433 iv = left; rc = right;
435 else if (riv && is_op_commutative(op) &&
437 iv = right; rc = left;
441 replace(irn, iv, rc, env);
451 * Check which SCC's are induction variables.
454 * @param env the environment
456 static void classify_iv(scc *pscc, iv_env *env) {
457 ir_node *irn, *next, *header = NULL;
461 /* find the header block for this scc */
462 for (irn = pscc->head; irn; irn = next) {
463 node_entry *e = get_irn_link(irn);
464 ir_node *block = get_nodes_block(irn);
467 b = get_irn_ne(block, env);
470 if (h->POnum < b->POnum) {
481 /* check if this scc contains only Phi, Add or Sub nodes */
482 for (irn = pscc->head; irn; irn = next) {
483 node_entry *e = get_irn_ne(irn, env);
486 switch (get_irn_opcode(irn)) {
490 for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
491 ir_node *pred = get_irn_n(irn, j);
492 node_entry *pe = get_irn_ne(pred, env);
494 if (pe->pscc != e->pscc) {
495 /* not in the same SCC, must be a region const */
496 if (! is_rc(pred, header)) {
497 /* not an induction variable */
504 /* not an induction variable */
508 /* found an induction variable */
509 DB((dbg, LEVEL_2, " Found an induction variable:\n "));
511 /* set the header for every node in this scc */
512 for (irn = pscc->head; irn; irn = next) {
513 node_entry *e = get_irn_ne(irn, env);
516 DB((dbg, LEVEL_2, " %+F,", irn));
518 DB((dbg, LEVEL_2, "\n"));
522 for (irn = pscc->head; irn; irn = next) {
523 node_entry *e = get_irn_ne(irn, env);
526 if (! check_replace(irn, env))
534 * @param pscc the SCC
535 * @param env the environment
537 static void process_scc(scc *pscc, iv_env *env) {
538 ir_node *head = pscc->head;
539 node_entry *e = get_irn_link(head);
545 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
546 for (irn = pscc->head; irn; irn = next) {
547 node_entry *e = get_irn_link(irn);
551 DB((dbg, LEVEL_4, " %+F,", irn));
553 DB((dbg, LEVEL_4, "\n"));
557 if (e->next == NULL) {
558 /* this SCC has only a single member */
559 check_replace(head, env);
562 classify_iv(pscc, env);
567 * Push a node onto the stack.
569 * @param env the environment
570 * @param n the node to push
572 static void push(iv_env *env, ir_node *n) {
575 if (env->tos == ARR_LEN(env->stack)) {
576 int nlen = ARR_LEN(env->stack) * 2;
577 ARR_RESIZE(ir_node *, env->stack, nlen);
579 env->stack[env->tos++] = n;
580 e = get_irn_ne(n, env);
585 * pop a node from the stack
587 * @param env the environment
589 * @return The topmost node
591 static ir_node *pop(iv_env *env)
593 ir_node *n = env->stack[--env->tos];
594 node_entry *e = get_irn_ne(n, env);
601 * Do Tarjan's SCC algorithm and drive OSR.
603 * @param irn start at this node
604 * @param env the environment
606 static void dfs(ir_node *irn, iv_env *env)
609 node_entry *node = get_irn_ne(irn, env);
611 mark_irn_visited(irn);
613 /* do not put blocks into the scc */
615 n = get_irn_arity(irn);
616 for (i = 0; i < n; ++i) {
617 ir_node *pred = get_irn_n(irn, i);
619 if (irn_not_visited(pred))
624 ir_node *block = get_nodes_block(irn);
626 node->DFSnum = env->nextDFSnum++;
627 node->low = node->DFSnum;
630 /* handle the block */
631 if (irn_not_visited(block))
634 n = get_irn_arity(irn);
635 for (i = 0; i < n; ++i) {
636 ir_node *pred = get_irn_n(irn, i);
637 node_entry *o = get_irn_ne(pred, env);
639 if (irn_not_visited(pred)) {
641 node->low = MIN(node->low, o->low);
643 if (o->DFSnum < node->DFSnum && o->in_stack)
644 node->low = MIN(o->DFSnum, node->low);
646 if (node->low == node->DFSnum) {
647 scc *pscc = obstack_alloc(&env->obst, sizeof(*pscc));
655 e = get_irn_ne(x, env);
657 e->next = pscc->head;
661 process_scc(pscc, env);
667 * Do the DFS by starting at the End node of a graph.
669 * @param irg the graph to process
670 * @param env the environment
672 static void do_dfs(ir_graph *irg, iv_env *env) {
673 ir_graph *rem = current_ir_graph;
674 ir_node *end = get_irg_end(irg);
677 current_ir_graph = irg;
678 inc_irg_visited(irg);
680 /* visit all visible nodes */
683 /* visit the keep-alives */
684 n = get_End_n_keepalives(end);
685 for (i = 0; i < n; ++i) {
686 ir_node *ka = get_End_keepalive(end, i);
688 if (irn_not_visited(ka))
692 current_ir_graph = rem;
696 * Post-block-walker: assign the post-order number.
698 static void assign_po(ir_node *block, void *ctx) {
700 node_entry *e = get_irn_ne(block, env);
702 e->POnum = env->POnum++;
706 * Follows the LFTR edges and return the last node in the chain.
708 * @param irn the node that should be followed
709 * @param env the IV environment
712 * In the current implementation only the last edge is stored, so
713 * only one chain exists. That's why we might miss some opportunities.
715 static ir_node *followEdges(ir_node *irn, iv_env *env) {
717 LFTR_edge *e = LFTR_find(irn, env);
726 * Apply one LFTR edge operation.
727 * Return NULL if the transformation cannot be done safely without
730 * @param rc the IV node that should be translated
731 * @param e the LFTR edge
732 * @param env the IV environment
734 * @return the translated region constant or NULL
735 * if the translation was not possible
738 * In the current implementation only the last edge is stored, so
739 * only one chain exists. That's why we might miss some opportunities.
741 static ir_node *applyOneEdge(ir_node *rc, LFTR_edge *e, iv_env *env) {
742 if (env->flags & osr_flag_lftr_with_ov_check) {
743 tarval *tv_l, *tv_r, *tv;
744 tarval_int_overflow_mode_t ovmode;
746 /* overflow can only be decided for Consts */
747 if (! is_Const(e->rc)) {
748 DB((dbg, LEVEL_4, " = UNKNOWN (%+F)", e->rc));
752 tv_l = get_Const_tarval(rc);
753 tv_r = get_Const_tarval(e->rc);
755 ovmode = tarval_get_integer_overflow_mode();
756 tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
760 tv = tarval_mul(tv_l, tv_r);
761 DB((dbg, LEVEL_4, " * %+F", tv_r));
764 tv = tarval_add(tv_l, tv_r);
765 DB((dbg, LEVEL_4, " + %+F", tv_r));
768 tv = tarval_sub(tv_l, tv_r);
769 DB((dbg, LEVEL_4, " - %+F", tv_r));
775 tarval_set_integer_overflow_mode(ovmode);
777 if (tv == tarval_bad) {
778 DB((dbg, LEVEL_4, " = OVERFLOW"));
781 return new_r_Const(current_ir_graph, get_irn_n(rc, -1), get_tarval_mode(tv), tv);
783 return do_apply(e->code, NULL, rc, e->rc, get_irn_mode(rc));
787 * Applies the operations represented by the LFTR edges to a
788 * region constant and returns the value.
789 * Return NULL if the transformation cannot be done safely without
792 * @param iv the IV node that starts the LFTR edge chain
793 * @param rc the region constant that should be translated
794 * @param env the IV environment
796 * @return the translated region constant or NULL
797 * if the translation was not possible
799 static ir_node *applyEdges(ir_node *iv, ir_node *rc, iv_env *env) {
802 if (env->flags & osr_flag_lftr_with_ov_check) {
803 /* overflow can only be decided for Consts */
804 if (! is_Const(rc)) {
805 DB((dbg, LEVEL_4, " = UNKNOWN (%+F)\n", rc));
808 DB((dbg, LEVEL_4, "%+F", get_Const_tarval(rc)));
811 for (irn = iv; rc;) {
812 LFTR_edge *e = LFTR_find(irn, env);
814 rc = applyOneEdge(rc, e, env);
820 DB((dbg, LEVEL_3, "\n"));
825 * Walker, finds Cmp(iv, rc) or Cmp(rc, iv)
826 * and tries to optimize them.
828 static void do_lftr(ir_node *cmp, void *ctx) {
830 ir_node *left, *right, *liv, *riv;
832 ir_node *nleft = NULL, *nright = NULL;
834 if (get_irn_op(cmp) != op_Cmp)
837 left = get_Cmp_left(cmp);
838 right = get_Cmp_right(cmp);
840 liv = is_iv(left, env);
841 riv = is_iv(right, env);
842 if (liv && is_rc(right, liv)) {
843 iv = left; rc = right;
845 nright = applyEdges(iv, rc, env);
847 nleft = followEdges(iv, env);
850 else if (riv && is_rc(left, riv)) {
851 iv = right; rc = left;
853 nleft = applyEdges(iv, rc, env);
855 nright = followEdges(iv, env);
859 if (nleft && nright) {
860 DB((dbg, LEVEL_2, " LFTR for %+F\n", cmp));
861 set_Cmp_left(cmp, nleft);
862 set_Cmp_right(cmp, nright);
863 ++env->lftr_replaced;
868 * do linear function test replacement.
870 * @param irg the graph that should be optimized
871 * @param env the IV environment
873 static void lftr(ir_graph *irg, iv_env *env) {
874 irg_walk_graph(irg, NULL, do_lftr, env);
878 * Pre-walker: set all node links to NULL and fix the
879 * block of Proj nodes.
881 static void clear_and_fix(ir_node *irn, void *env)
883 set_irn_link(irn, NULL);
886 ir_node *pred = get_Proj_pred(irn);
887 set_irn_n(irn, -1, get_irn_n(pred, -1));
891 /* Performs Operator Strength Reduction for the passed graph. */
892 void opt_osr(ir_graph *irg, unsigned flags) {
895 if (! get_opt_strength_red())
898 FIRM_DBG_REGISTER(dbg, "firm.opt.osr");
899 // firm_dbg_set_mask(dbg, SET_LEVEL_3);
901 DB((dbg, LEVEL_1, "Doing Operator Strength Reduction for %+F\n", irg));
903 obstack_init(&env.obst);
904 env.stack = NEW_ARR_F(ir_node *, 128);
908 env.quad_map = new_set(quad_cmp, 64);
909 env.lftr_edges = new_set(LFTR_cmp, 64);
911 env.lftr_replaced = 0;
914 /* Clear all links and move Proj nodes into the
915 the same block as it's predecessors.
916 This can improve the placement of new nodes.
918 irg_walk_graph(irg, NULL, clear_and_fix, NULL);
920 /* we need dominance */
922 assure_irg_outs(irg);
924 /* calculate the post order number for blocks. */
925 irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
927 /* calculate the SCC's and drive OSR. */
931 /* try linear function test replacements */
934 set_irg_outs_inconsistent(irg);
935 set_irg_loopinfo_inconsistent(irg);
937 DB((dbg, LEVEL_1, "Replacements: %u + %u (lftr)\n\n", env.replaced, env.lftr_replaced));
940 del_set(env.lftr_edges);
941 del_set(env.quad_map);
942 DEL_ARR_F(env.stack);
943 obstack_free(&env.obst, NULL);