3 * File name: ir/opt/opt_osr.
4 * Purpose: Operator Strength Reduction,
5 * Keith D. Cooper, L. Taylor Simpson, Christopher A. Vick
10 * Copyright: (c) 2006 Universität Karlsruhe
11 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
43 /** The debug handle. */
44 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
48 ir_node *head; /**< the head of the list */
52 typedef struct node_entry {
53 unsigned DFSnum; /**< the DFS number of this node */
54 unsigned low; /**< the low number of this node */
55 ir_node *header; /**< the header of this node */
56 int in_stack; /**< flag, set if the node is on the stack */
57 ir_node *next; /**< link to the next node the the same scc */
58 scc *pscc; /**< the scc of this node */
59 unsigned POnum; /**< the post order number for blocks */
62 /** The environment. */
63 typedef struct iv_env {
64 struct obstack obst; /**< an obstack for allocations */
65 ir_node **stack; /**< the node stack */
66 int tos; /**< tos index */
67 unsigned nextDFSnum; /**< the current DFS number */
68 unsigned POnum; /**< current post order number */
69 set *quad_map; /**< a map from (op, iv, rc) to node */
70 set *lftr_edges; /**< the set of lftr edges */
71 unsigned replaced; /**< number of replaced ops */
72 unsigned lftr_replaced; /**< number of applied linear function test replacements */
73 unsigned flags; /**< additional flags */
77 * An entry in the (op, node, node) -> node map.
79 typedef struct quad_t {
80 opcode code; /**< the opcode of the reduced operation */
81 ir_node *op1; /**< the first operand the reduced operation */
82 ir_node *op2; /**< the second operand of the reduced operation */
84 ir_node *res; /**< the reduced operation */
90 typedef struct LFTR_edge {
91 ir_node *src; /**< the source node */
92 ir_node *dst; /**< the destination node */
93 opcode code; /**< the opcode that must be applied */
94 ir_node *rc; /**< the region const that must be applied */
98 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env);
101 * Compare two LFTR edges.
103 static int LFTR_cmp(const void *e1, const void *e2, size_t size) {
104 const LFTR_edge *l1 = e1;
105 const LFTR_edge *l2 = e2;
107 return l1->src != l2->src;
113 static LFTR_edge *LFTR_find(ir_node *src, iv_env *env) {
118 return set_find(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
124 static void LFTR_add(ir_node *src, ir_node *dst, opcode code, ir_node *rc, iv_env *env) {
133 * There might be more than one edge here. This is rather bad
134 * because we currently store only one.
136 // assert(LFTR_find(src, env) == NULL);
137 set_insert(env->lftr_edges, &key, sizeof(key), HASH_PTR(src));
141 * Gets the node_entry of a node
143 static node_entry *get_irn_ne(ir_node *irn, iv_env *env) {
144 node_entry *e = get_irn_link(irn);
147 e = obstack_alloc(&env->obst, sizeof(*e));
148 memset(e, 0, sizeof(*e));
149 set_irn_link(irn, e);
155 * Check if irn is an IV.
157 * @param irn the node to check
158 * @param env the environment
160 * @returns the header if it is one, NULL else
162 static ir_node *is_iv(ir_node *irn, iv_env *env) {
163 return get_irn_ne(irn, env)->header;
167 * Check if irn is a region constant.
168 * The block or irn must strictly dominate the header block.
170 * @param irn the node to check
171 * @param header_block the header block of the induction variable
173 static int is_rc(ir_node *irn, ir_node *header_block) {
174 ir_node *block = get_nodes_block(irn);
176 return (block != header_block) && block_dominates(block, header_block);
180 * Set compare function for the quad set.
182 static int quad_cmp(const void *e1, const void *e2, size_t size) {
183 const quad_t *c1 = e1;
184 const quad_t *c2 = e2;
186 return c1->code != c2->code || c1->op1 != c2->op1 || c1->op2 != c2->op2;
190 * Check if an reduced operation was already calculated.
192 * @param code the opcode of the operation
193 * @param op1 the first operand of the operation
194 * @param op2 the second operand of the operation
195 * @param env the environment
197 * @return the already reduced node or NULL if this operation is not yet reduced
199 static ir_node *search(opcode code, ir_node *op1, ir_node *op2, iv_env *env) {
206 entry = set_find(env->quad_map, &key, sizeof(key),
207 (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
214 * Add an reduced operation.
216 * @param code the opcode of the operation
217 * @param op1 the first operand of the operation
218 * @param op2 the second operand of the operation
219 * @param result the result of the reduced operation
220 * @param env the environment
222 static void add(opcode code, ir_node *op1, ir_node *op2, ir_node *result, iv_env *env) {
230 set_insert(env->quad_map, &key, sizeof(key),
231 (code * 9) ^ HASH_PTR(op1) ^HASH_PTR(op2));
235 * Find a location where to place a bin-op whose operands are in
238 * @param block1 the block of the first operand
239 * @param block2 the block of the second operand
241 * Note that we know here that such a place must exists. Moreover, this means
242 * that either block1 dominates block2 or vice versa. So, just return
245 static ir_node *find_location(ir_node *block1, ir_node *block2) {
246 if (block_dominates(block1, block2))
248 assert(block_dominates(block2, block1));
253 * Create a node that executes an op1 code op1 operation.
255 * @param code the opcode to execute
256 * @param db debug info to add to the new node
257 * @param op1 the first operand
258 * @param op2 the second operand
259 * @param mode the mode of the new operation
261 * @return the newly created node
263 static ir_node *do_apply(opcode code, dbg_info *db, ir_node *op1, ir_node *op2, ir_mode *mode) {
264 ir_graph *irg = current_ir_graph;
266 ir_node *block = find_location(get_nodes_block(op1), get_nodes_block(op2));
270 result = new_rd_Mul(db, irg, block, op1, op2, mode);
273 result = new_rd_Add(db, irg, block, op1, op2, mode);
276 result = new_rd_Sub(db, irg, block, op1, op2, mode);
286 * The Apply operation.
288 * @param orig the node that represent the original operation and determines
289 * the opcode, debug-info and mode of a newly created one
290 * @param op1 the first operand
291 * @param op2 the second operand
292 * @param env the environment
294 * @return the newly created node
296 static ir_node *apply(ir_node *orig, ir_node *op1, ir_node *op2, iv_env *env) {
297 opcode code = get_irn_opcode(orig);
298 ir_node *result = search(code, op1, op2, env);
301 dbg_info *db = get_irn_dbg_info(orig);
302 ir_node *op1_header = get_irn_ne(op1, env)->header;
303 ir_node *op2_header = get_irn_ne(op2, env)->header;
305 if (op1_header != NULL && is_rc(op2, op1_header)) {
306 result = reduce(orig, op1, op2, env);
308 else if (op2_header != NULL && is_rc(op1, op2_header)) {
309 result = reduce(orig, op2, op1, env);
312 result = do_apply(code, db, op1, op2, get_irn_mode(orig));
313 get_irn_ne(result, env)->header = NULL;
320 * The Reduce operation.
322 * @param orig the node that represent the original operation and determines
323 * the opcode, debug-info and mode of a newly created one
324 * @param iv the induction variable
325 * @param rc the region constant
326 * @param env the environment
328 * @return the reduced node
330 static ir_node *reduce(ir_node *orig, ir_node *iv, ir_node *rc, iv_env *env) {
331 opcode code = get_irn_opcode(orig);
332 ir_node *result = search(code, iv, rc, env);
335 node_entry *e, *iv_e;
337 ir_mode *mode = get_irn_mode(orig);
339 result = exact_copy(iv);
340 if (mode_is_reference(mode)) {
341 /* bad case: we replace a reference mode calculation.
342 assure that the new IV will be a reference one */
343 set_irn_mode(result, mode);
345 add(code, iv, rc, result, env);
346 DB((dbg, LEVEL_3, " Created new %+F for %+F (%s %+F)\n", result, iv,
347 get_irn_opname(orig), rc));
349 iv_e = get_irn_ne(iv, env);
350 e = get_irn_ne(result, env);
351 e->header = iv_e->header;
353 /* create the LFTR edge */
354 LFTR_add(iv, result, code, rc, env);
356 n = get_irn_arity(result);
357 for (i = 0; i < n; ++i) {
358 ir_node *o = get_irn_n(result, i);
360 e = get_irn_ne(o, env);
361 if (e->header == iv_e->header)
362 o = reduce(orig, o, rc, env);
363 else if (is_Phi(result))
364 o = apply(orig, o, rc, env);
367 o = apply(orig, o, rc, env);
369 set_irn_n(result, i, o);
373 DB((dbg, LEVEL_3, " Already Created %+F for %+F (%s %+F)\n", result, iv,
374 get_irn_opname(orig), rc));
380 * The Replace operation.
382 * @param irn the node that will be replaced
383 * @param iv the induction variable
384 * @param rc the region constant
385 * @param env the environment
387 static void replace(ir_node *irn, ir_node *iv, ir_node *rc, iv_env *env) {
390 DB((dbg, LEVEL_2, " Replacing %+F\n", irn));
392 result = reduce(irn, iv, rc, env);
394 node_entry *e, *iv_e;
396 hook_strength_red(current_ir_graph, irn);
397 exchange(irn, result);
398 e = get_irn_ne(result, env);
399 iv_e = get_irn_ne(iv, env);
400 e->header = iv_e->header;
405 * Check if a node can be replaced (+, -, *).
407 * @param irn the node to check
408 * @param env the environment
410 * @return non-zero if irn should be Replace'd
412 static int check_replace(ir_node *irn, iv_env *env) {
413 ir_node *left, *right, *iv, *rc;
414 ir_op *op = get_irn_op(irn);
415 opcode code = get_op_code(op);
424 left = get_binop_left(irn);
425 right = get_binop_right(irn);
427 liv = is_iv(left, env);
428 riv = is_iv(right, env);
429 if (liv && is_rc(right, liv)) {
430 iv = left; rc = right;
432 else if (riv && is_op_commutative(op) &&
434 iv = right; rc = left;
438 replace(irn, iv, rc, env);
450 * Check which SCC's are induction variables.
453 * @param env the environment
455 static void classify_iv(scc *pscc, iv_env *env) {
456 ir_node *irn, *next, *header = NULL;
460 /* find the header block for this scc */
461 for (irn = pscc->head; irn; irn = next) {
462 node_entry *e = get_irn_link(irn);
463 ir_node *block = get_nodes_block(irn);
466 b = get_irn_ne(block, env);
469 if (h->POnum < b->POnum) {
480 /* check if this scc contains only Phi, Add or Sub nodes */
481 for (irn = pscc->head; irn; irn = next) {
482 node_entry *e = get_irn_ne(irn, env);
485 switch (get_irn_opcode(irn)) {
489 for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
490 ir_node *pred = get_irn_n(irn, j);
491 node_entry *pe = get_irn_ne(pred, env);
493 if (pe->pscc != e->pscc) {
494 /* not in the same SCC, must be a region const */
495 if (! is_rc(pred, header)) {
496 /* not an induction variable */
503 /* not an induction variable */
507 /* found an induction variable */
508 DB((dbg, LEVEL_2, " Found an induction variable:\n "));
510 /* set the header for every node in this scc */
511 for (irn = pscc->head; irn; irn = next) {
512 node_entry *e = get_irn_ne(irn, env);
515 DB((dbg, LEVEL_2, " %+F,", irn));
517 DB((dbg, LEVEL_2, "\n"));
521 for (irn = pscc->head; irn; irn = next) {
522 node_entry *e = get_irn_ne(irn, env);
525 if (! check_replace(irn, env))
533 * @param pscc the SCC
534 * @param env the environment
536 static void process_scc(scc *pscc, iv_env *env) {
537 ir_node *head = pscc->head;
538 node_entry *e = get_irn_link(head);
544 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
545 for (irn = pscc->head; irn; irn = next) {
546 node_entry *e = get_irn_link(irn);
550 DB((dbg, LEVEL_4, " %+F,", irn));
552 DB((dbg, LEVEL_4, "\n"));
556 if (e->next == NULL) {
557 /* this SCC has only a single member */
558 check_replace(head, env);
561 classify_iv(pscc, env);
566 * Push a node onto the stack.
568 * @param env the environment
569 * @param n the node to push
571 static void push(iv_env *env, ir_node *n) {
574 if (env->tos == ARR_LEN(env->stack)) {
575 int nlen = ARR_LEN(env->stack) * 2;
576 ARR_RESIZE(ir_node *, env->stack, nlen);
578 env->stack[env->tos++] = n;
579 e = get_irn_ne(n, env);
584 * pop a node from the stack
586 * @param env the environment
588 * @return The topmost node
590 static ir_node *pop(iv_env *env)
592 ir_node *n = env->stack[--env->tos];
593 node_entry *e = get_irn_ne(n, env);
600 * Do Tarjan's SCC algorithm and drive OSR.
602 * @param irn start at this node
603 * @param env the environment
605 static void dfs(ir_node *irn, iv_env *env)
608 node_entry *node = get_irn_ne(irn, env);
610 mark_irn_visited(irn);
612 /* do not put blocks into the scc */
614 n = get_irn_arity(irn);
615 for (i = 0; i < n; ++i) {
616 ir_node *pred = get_irn_n(irn, i);
618 if (irn_not_visited(pred))
623 ir_node *block = get_nodes_block(irn);
625 node->DFSnum = env->nextDFSnum++;
626 node->low = node->DFSnum;
629 /* handle the block */
630 if (irn_not_visited(block))
633 n = get_irn_arity(irn);
634 for (i = 0; i < n; ++i) {
635 ir_node *pred = get_irn_n(irn, i);
636 node_entry *o = get_irn_ne(pred, env);
638 if (irn_not_visited(pred)) {
640 node->low = MIN(node->low, o->low);
642 if (o->DFSnum < node->DFSnum && o->in_stack)
643 node->low = MIN(o->DFSnum, node->low);
645 if (node->low == node->DFSnum) {
646 scc *pscc = obstack_alloc(&env->obst, sizeof(*pscc));
654 e = get_irn_ne(x, env);
656 e->next = pscc->head;
660 process_scc(pscc, env);
666 * Do the DFS by starting at the End node of a graph.
668 * @param irg the graph to process
669 * @param env the environment
671 static void do_dfs(ir_graph *irg, iv_env *env) {
672 ir_graph *rem = current_ir_graph;
673 ir_node *end = get_irg_end(irg);
676 current_ir_graph = irg;
677 inc_irg_visited(irg);
679 /* visit all visible nodes */
682 /* visit the keep-alives */
683 n = get_End_n_keepalives(end);
684 for (i = 0; i < n; ++i) {
685 ir_node *ka = get_End_keepalive(end, i);
687 if (irn_not_visited(ka))
691 current_ir_graph = rem;
695 * Post-block-walker: assign the post-order number.
697 static void assign_po(ir_node *block, void *ctx) {
699 node_entry *e = get_irn_ne(block, env);
701 e->POnum = env->POnum++;
705 * Follows the LFTR edges and return the last node in the chain.
707 * @param irn the node that should be followed
708 * @param env the IV environment
711 * In the current implementation only the last edge is stored, so
712 * only one chain exists. That's why we might miss some opportunities.
714 static ir_node *followEdges(ir_node *irn, iv_env *env) {
716 LFTR_edge *e = LFTR_find(irn, env);
725 * Apply one LFTR edge operation.
726 * Return NULL if the transformation cannot be done safely without
729 * @param rc the IV node that should be translated
730 * @param e the LFTR edge
731 * @param env the IV environment
733 * @return the translated region constant or NULL
734 * if the translation was not possible
737 * In the current implementation only the last edge is stored, so
738 * only one chain exists. That's why we might miss some opportunities.
740 static ir_node *applyOneEdge(ir_node *rc, LFTR_edge *e, iv_env *env) {
741 if (env->flags & osr_flag_lftr_with_ov_check) {
742 tarval *tv_l, *tv_r, *tv;
743 tarval_int_overflow_mode_t ovmode;
745 /* overflow can only be decided for Consts */
746 if (! is_Const(e->rc)) {
747 DB((dbg, LEVEL_4, " = UNKNOWN (%+F)", e->rc));
751 tv_l = get_Const_tarval(rc);
752 tv_r = get_Const_tarval(e->rc);
754 ovmode = tarval_get_integer_overflow_mode();
755 tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
759 tv = tarval_mul(tv_l, tv_r);
760 DB((dbg, LEVEL_4, " * %+F", tv_r));
763 tv = tarval_add(tv_l, tv_r);
764 DB((dbg, LEVEL_4, " + %+F", tv_r));
767 tv = tarval_sub(tv_l, tv_r);
768 DB((dbg, LEVEL_4, " - %+F", tv_r));
774 tarval_set_integer_overflow_mode(ovmode);
776 if (tv == tarval_bad) {
777 DB((dbg, LEVEL_4, " = OVERFLOW"));
780 return new_r_Const(current_ir_graph, get_irn_n(rc, -1), get_tarval_mode(tv), tv);
782 return do_apply(e->code, NULL, rc, e->rc, get_irn_mode(rc));
786 * Applies the operations represented by the LFTR edges to a
787 * region constant and returns the value.
788 * Return NULL if the transformation cannot be done safely without
791 * @param iv the IV node that starts the LFTR edge chain
792 * @param rc the region constant that should be translated
793 * @param env the IV environment
795 * @return the translated region constant or NULL
796 * if the translation was not possible
798 static ir_node *applyEdges(ir_node *iv, ir_node *rc, iv_env *env) {
801 if (env->flags & osr_flag_lftr_with_ov_check) {
802 /* overflow can only be decided for Consts */
803 if (! is_Const(rc)) {
804 DB((dbg, LEVEL_4, " = UNKNOWN (%+F)\n", rc));
807 DB((dbg, LEVEL_4, "%+F", get_Const_tarval(rc)));
810 for (irn = iv; rc;) {
811 LFTR_edge *e = LFTR_find(irn, env);
813 rc = applyOneEdge(rc, e, env);
819 DB((dbg, LEVEL_3, "\n"));
824 * Walker, finds Cmp(iv, rc) or Cmp(rc, iv)
825 * and tries to optimize them.
827 static void do_lftr(ir_node *cmp, void *ctx) {
829 ir_node *left, *right, *liv, *riv;
831 ir_node *nleft = NULL, *nright = NULL;
833 if (get_irn_op(cmp) != op_Cmp)
836 left = get_Cmp_left(cmp);
837 right = get_Cmp_right(cmp);
839 liv = is_iv(left, env);
840 riv = is_iv(right, env);
841 if (liv && is_rc(right, liv)) {
842 iv = left; rc = right;
844 nright = applyEdges(iv, rc, env);
846 nleft = followEdges(iv, env);
849 else if (riv && is_rc(left, riv)) {
850 iv = right; rc = left;
852 nleft = applyEdges(iv, rc, env);
854 nright = followEdges(iv, env);
858 if (nleft && nright) {
859 DB((dbg, LEVEL_2, " LFTR for %+F\n", cmp));
860 set_Cmp_left(cmp, nleft);
861 set_Cmp_right(cmp, nright);
862 ++env->lftr_replaced;
867 * do linear function test replacement.
869 * @param irg the graph that should be optimized
870 * @param env the IV environment
872 static void lftr(ir_graph *irg, iv_env *env) {
873 irg_walk_graph(irg, NULL, do_lftr, env);
877 * Pre-walker: set all node links to NULL and fix the
878 * block of Proj nodes.
880 static void clear_and_fix(ir_node *irn, void *env)
882 set_irn_link(irn, NULL);
885 ir_node *pred = get_Proj_pred(irn);
886 set_irn_n(irn, -1, get_irn_n(pred, -1));
890 /* Performs Operator Strength Reduction for the passed graph. */
891 void opt_osr(ir_graph *irg, unsigned flags) {
894 if (! get_opt_strength_red())
897 FIRM_DBG_REGISTER(dbg, "firm.opt.osr");
898 // firm_dbg_set_mask(dbg, SET_LEVEL_3);
900 DB((dbg, LEVEL_1, "Doing Operator Strength Reduction for %+F\n", irg));
902 obstack_init(&env.obst);
903 env.stack = NEW_ARR_F(ir_node *, 128);
907 env.quad_map = new_set(quad_cmp, 64);
908 env.lftr_edges = new_set(LFTR_cmp, 64);
910 env.lftr_replaced = 0;
913 /* Clear all links and move Proj nodes into the
914 the same block as it's predecessors.
915 This can improve the placement of new nodes.
917 irg_walk_graph(irg, NULL, clear_and_fix, NULL);
919 /* we need dominance */
921 assure_irg_outs(irg);
923 /* calculate the post order number for blocks. */
924 irg_out_block_walk(get_irg_start_block(irg), NULL, assign_po, &env);
926 /* calculate the SCC's and drive OSR. */
930 /* try linear function test replacements */
933 set_irg_outs_inconsistent(irg);
934 set_irg_loopinfo_inconsistent(irg);
936 DB((dbg, LEVEL_1, "Replacements: %u + %u (lftr)\n\n", env.replaced, env.lftr_replaced));
939 del_set(env.lftr_edges);
940 del_set(env.quad_map);
941 DEL_ARR_F(env.stack);
942 obstack_free(&env.obst, NULL);