2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
40 #include "irnodemap.h"
41 #include "irnodeset.h"
43 #include "iropt_dbg.h"
46 #include "irgraph_t.h"
50 /** Additional info we need for every block. */
51 typedef struct block_info {
52 ir_valueset_t *exp_gen; /**< The set of expression per block. */
53 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
54 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
55 ir_valueset_t *new_set; /**< The set of all new values for a block. */
56 ir_node *avail; /**< The get_map(avail, block) result. */
57 int not_found; /**< Non-zero, if avail was not found in this block. */
58 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
62 * A pair of nodes that must be exchanged.
63 * We must defer the exchange because our hash-sets cannot
64 * find an already replace node else.
66 typedef struct elim_pair {
67 ir_node *old_node; /**< The old node that will be replaced. */
68 ir_node *new_node; /**< The new node. */
69 struct elim_pair *next; /**< Links all entries in a list. */
70 int reason; /**< The reason for the replacement. */
73 /** The environment for the GVN-PRE algorithm */
74 typedef struct pre_env {
75 struct obstack *obst; /**< The obstack to allocate on. */
76 ir_node *start_block; /**< The start block of the current graph. */
77 ir_node *end_block; /**< The end block of the current graph */
78 block_info *list; /**< Links all block info entires for easier recovery. */
79 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
80 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
81 char first_iter; /**< non-zero for first iteration */
84 static pset *value_table;
85 static ir_nodemap_t value_map;
87 /** The debug module handle. */
88 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
90 /* ---------- Functions for Value sets ---------- */
92 /** computes dst = dst \/ src for value sets */
93 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
95 ir_valueset_iterator_t iter;
96 ir_node *value, *expr;
98 foreach_valueset(src, value, expr, iter) {
99 ir_valueset_insert(dst, value, expr);
104 /* ---------- Functions for Values ---------- */
107 * Add a node e representing the value v to the set.
109 * @param e a node representing an expression
110 * @param v a node representing a value
112 * @return the final value for the expression e
114 static ir_node *add(ir_node *e, ir_node *v)
117 ir_node *pred = get_Proj_pred(v);
118 ir_node *v_pred = identify_remember(value_table, pred);
120 if (v_pred != pred) {
121 /* must create a new value here */
122 v = new_r_Proj(current_ir_graph, get_nodes_block(v_pred), v_pred, get_irn_mode(v), get_Proj_proj(v));
125 v = identify_remember(value_table, v);
126 ir_nodemap_insert(&value_map, e, v);
131 * Lookup a value in a value set.
133 * @param e a node representing an expression
135 * @return a node representing the value or NULL if
136 * the given expression is not available
138 static ir_node *lookup(ir_node *e)
140 ir_node *value = ir_nodemap_get(&value_map, e);
142 return identify_remember(value_table, value);
147 * Return the block info of a block.
149 * @param block the block
151 static block_info *get_block_info(ir_node *block) {
152 return get_irn_link(block);
153 } /* get_block_info */
156 * Allocate a block info for a block.
158 * @param block the block
159 * @param env the environment
161 static void alloc_blk_info(ir_node *block, pre_env *env) {
162 block_info *info = obstack_alloc(env->obst, sizeof(*info));
164 set_irn_link(block, info);
165 info->exp_gen = ir_valueset_new(16);
166 info->avail_out = ir_valueset_new(16);
167 info->antic_in = ir_valueset_new(16);
168 info->new_set = NULL;
171 info->next = env->list;
173 } /* alloc_blk_info */
176 * Returns non-zero if a node is movable and a possible candidate for PRE.
180 static int is_nice_value(ir_node *n) {
184 n = get_Proj_pred(n);
185 if (get_irn_pinned(n) == op_pin_state_pinned)
187 mode = get_irn_mode(n);
188 if (!mode_is_data(mode)) {
189 if (! is_Div(n) && ! is_Mod(n) && ! is_DivMod(n))
191 if (! is_NoMem(get_fragile_op_mem(n)))
195 } /* is_nice_value */
201 * @param set the set to dump
202 * @param txt a text to describe the set
203 * @param block the owner block of the set
205 static void dump_value_set(ir_valueset_t *set, char *txt, ir_node *block) {
206 ir_valueset_iterator_t iter;
207 ir_node *value, *expr;
210 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
212 foreach_valueset(set, value, expr, iter) {
214 DB((dbg, LEVEL_2, "\n"));
216 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
218 DB((dbg, LEVEL_2, " %+F,", expr));
221 DB((dbg, LEVEL_2, "\n}\n"));
222 } /* dump_value_set */
225 #define dump_value_set(set, txt, block)
226 #endif /* DEBUG_libfirm */
229 * Topological walker. Allocates block info for every block and place nodes in topological
230 * order into the nodes set.
232 static void topo_walker(ir_node *irn, void *ctx) {
239 /* the topological walker ensures that blocks are visited before anything else */
240 alloc_blk_info(irn, env);
243 /* GVN step: remember the value */
244 value = add(irn, irn);
246 /* no need to put constants into the sets: they are always redundant */
247 if (! is_nice_value(irn) || is_irn_constlike(irn))
250 /* Do not put mode_T nodes info the sets, or PhiT will be created
251 (which are not allowed in Firm). Instead, put the Proj's here only. */
252 if (get_irn_mode(irn) == mode_T)
255 /* place this node into the set of possible nodes of its block */
256 block = get_nodes_block(irn);
257 info = get_block_info(block);
259 ir_valueset_insert(info->exp_gen, value, irn);
263 * Computes Avail_out(block):
265 * Avail_in(block) = Avail_out(dom(block))
266 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
269 * This function must be called in the top-down dominance order:
270 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
272 * @param block the block
273 * @param ctx walker context
275 static void compute_avail_top_down(ir_node *block, void *ctx)
278 block_info *dom_info;
279 block_info *info = get_block_info(block);
282 /* we don't need the end block Avail */
283 if (block == env->end_block)
287 * First add all nodes from the dominator.
288 * This must be done to ensure that Antic_out contains the leader
289 * for every node. The root has no dominator.
291 if (block != env->start_block) {
292 dom_blk = get_Block_idom(block);
293 assert(is_Block(dom_blk));
295 dom_info = get_block_info(dom_blk);
298 value_union(info->avail_out, dom_info->avail_out);
300 value_union(info->avail_out, info->exp_gen);
302 dump_value_set(info->avail_out, "Avail_out", block);
303 } /* compute_avail_top_down */
306 * check if a node n is clean in block block.
309 * @param block the block
311 static int _is_clean(ir_node *n, ir_node *block)
315 if (get_nodes_block(n) != block)
323 if (! is_nice_value(n))
325 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
326 ir_node *pred = get_irn_n(n, i);
327 if (! _is_clean(pred, block))
337 * check if a node n is clean.
339 * @param n the node to check
341 static int is_clean(ir_node *n) {
342 int res = _is_clean(n, get_nodes_block(n));
347 * Implements phi_translate. Translate a expression above a Phi.
349 * @param node the node
350 * @param block the block in which the node is translated
351 * @param pos the input number of the destination block
352 * @param env the environment
354 * @return a node representing the translated value
356 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, pre_env *env)
363 if (get_nodes_block(node) == block) {
364 /* a Phi inside our block */
365 return get_Phi_pred(node, pos);
367 /* already outside */
371 arity = get_irn_intra_arity(node);
373 /* check if the node has at least one Phi predecessor */
374 for (i = 0; i < arity; ++i) {
375 ir_node *pred = get_irn_intra_n(node, i);
376 ir_node *leader = lookup(pred);
378 leader = leader != NULL ? leader : pred;
379 if (is_Phi(leader) && get_nodes_block(pred) == block)
383 /* no Phi in the predecessors */
387 /* Create a copy of the node in the pos'th predecessor block.
388 Use our environmental obstack, as these nodes are always
390 old = current_ir_graph->obst;
391 current_ir_graph->obst = env->obst;
393 get_irn_dbg_info(node),
400 /* We need the attribute copy here, because the Hash value of a
401 node might depend on that. */
402 copy_node_attr(node, nn);
404 set_nodes_block(nn, get_nodes_block(node));
405 for (i = 0; i < arity; ++i) {
406 ir_node *pred = get_irn_intra_n(node, i);
407 ir_node *leader = lookup(pred);
409 leader = leader != NULL ? leader : pred;
410 if (is_Phi(leader) && get_irn_intra_n(pred, -1) == block)
411 set_irn_n(nn, i, get_Phi_pred(leader, pos));
413 set_irn_n(nn, i, leader);
415 current_ir_graph->obst = old;
417 } /* phi_translate */
420 * Block-walker, computes Antic_in(block).
422 * @param block the block
423 * @param ctx the walker environment
425 static void compute_antic(ir_node *block, void *ctx) {
427 block_info *succ_info;
428 block_info *info = get_block_info(block);
429 ir_node *succ, *value, *expr;
431 ir_valueset_iterator_t iter;
433 /* no need for computations in start block */
434 if (block == env->start_block)
437 size = ir_valueset_size(info->antic_in);
439 /* the end block has no successor */
440 if (block != env->end_block) {
444 * This step puts all generated expression from the current
445 * current block into Antic_in.
446 * It is enough to do this in the first iteration only, because
447 * the set info->exp_gen is not changed anymore.
449 if (env->first_iter) {
450 foreach_valueset(info->exp_gen, value, expr, iter) {
451 ir_valueset_insert(info->antic_in, value, expr);
455 n_succ = get_Block_n_cfg_outs(block);
459 /* find blocks position in succ's block predecessors */
460 succ = get_Block_cfg_out(block, 0);
461 for (i = get_Block_n_cfgpreds(succ) - 1; i >= 0; --i) {
462 if (get_Block_cfgpred_block(succ, i) == block) {
469 succ_info = get_block_info(succ);
470 /* translate into list: we cannot insert into a set we iterate
471 * and succ might be equal to block for endless loops */
472 foreach_valueset(succ_info->antic_in, value, expr, iter) {
473 ir_node *trans = phi_translate(expr, succ, pos, env);
476 ir_valueset_insert(info->antic_in, value, trans);
478 } else { /* n_succ > 1 */
480 block_info *succ0_info;
485 /* Select a successor to compute the disjoint of all Nodes
486 sets, it might be useful to select the block with the
487 smallest number of nodes. For simplicity we choose the
489 succ0 = get_Block_cfg_out(block, 0);
490 succ0_info = get_block_info(succ0);
491 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
492 /* we need the disjoint */
493 for (i = 1; i < n_succ; ++i) {
494 ir_node *succ = get_Block_cfg_out(block, i);
495 block_info *succ_info = get_block_info(succ);
496 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL)
500 /* we found a value that is common in all Antic_in(succ(b)),
501 put it in Antic_in(b) if the value is NOT already represented. */
502 ir_valueset_insert(info->antic_in, value, expr);
508 /* we do not need a clean here, because we ensure that only cleaned nodes are in exp_gen
509 * and all other sets */
511 dump_value_set(info->antic_in, "Antic_in", block);
512 if (size != ir_valueset_size(info->antic_in)) {
513 /* the Antic_in set has changed */
516 } /* compute_antic */
519 * Perform insertion of partially redundant values.
520 * For every Block node, do the following:
521 * 1. Propagate the NEW_SETS of the dominator into the current block.
522 * If the block has multiple predecessors,
523 * 2a. Iterate over the ANTIC expressions for the block to see if
524 * any of them are partially redundant.
525 * 2b. If so, insert them into the necessary predecessors to make
526 * the expression fully redundant.
527 * 2c. Insert a new Phi merging the values of the predecessors.
528 * 2d. Insert the new Phi, and the new expressions, into the
531 * @param block the block
532 * @param ctx the walker environment
534 static void insert_nodes(ir_node *block, void *ctx)
537 ir_node *value, *expr, *idom, *first_s, *worklist;
538 block_info *curr_info, *idom_info;
539 int pos, arity = get_irn_intra_arity(block);
540 int all_same, by_some, updated;
541 ir_valueset_iterator_t iter;
543 /* ensure that even the start block has a new_set */
544 curr_info = get_block_info(block);
545 if (curr_info->new_set)
546 ir_valueset_del(curr_info->new_set);
547 curr_info->new_set = ir_valueset_new(16);
549 if (block == env->start_block)
552 idom = get_Block_idom(block);
553 idom_info = get_block_info(idom);
555 /* update the new_sets */
557 dump_value_set(idom_info->new_set, "[New Set]", idom);
558 foreach_valueset(idom_info->new_set, value, expr, iter) {
559 ir_valueset_insert(curr_info->new_set, value, expr);
560 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
563 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
569 /* convert the set into a list. This allows the removal of
570 * elements from the set */
572 foreach_valueset(curr_info->antic_in, value, expr, iter) {
575 /* If the value was already computed in the dominator, then
576 it is totally redundant. Hence we have nothing to insert. */
577 if (ir_valueset_lookup(idom_info->avail_out, value)) {
578 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
587 /* for all predecessor blocks */
588 for (pos = 0; pos < arity; ++pos) {
589 block_info *pred_info;
590 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
591 ir_node *e_prime, *v_prime, *e_dprime;
593 /* ignore bad blocks. */
594 if (is_Bad(pred_blk))
597 e_prime = phi_translate(expr, block, pos, env);
598 v_prime = lookup(e_prime);
602 pred_info = get_block_info(pred_blk);
603 e_dprime = ir_valueset_lookup(pred_info->avail_out, v_prime);
605 if (e_dprime == NULL) {
606 pred_info->avail = e_prime;
607 pred_info->not_found = 1;
610 pred_info->avail = e_dprime;
611 pred_info->not_found = 0;
612 mode = get_irn_mode(e_dprime);
617 else if (first_s != e_dprime)
620 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, e_dprime, pred_blk));
624 /* If it's not the same value already existing along every predecessor, and
625 it's defined by some predecessor, it is partially redundant. */
626 if (! all_same && by_some) {
629 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
631 in = xmalloc(arity * sizeof(*in));
632 /* for all predecessor blocks */
633 for (pos = 0; pos < arity; ++pos) {
634 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
635 block_info *pred_info = get_block_info(pred_blk);
637 /* ignore bad blocks. */
638 if (is_Bad(pred_blk)) {
643 /* ignore blocks that already have the expression */
644 if (pred_info->not_found) {
645 ir_node *e_prime = pred_info->avail;
647 if (!is_Phi(e_prime)) {
648 ir_node *proj_pred = NULL;
649 if (is_Proj(e_prime)) {
650 ir_node *pred = get_Proj_pred(e_prime);
651 mode = get_irn_mode(pred);
653 get_irn_dbg_info(pred),
654 current_ir_graph, pred_blk,
658 get_irn_in(pred) + 1);
659 copy_node_attr(pred, nn);
661 DB((dbg, LEVEL_2, "New node %+F in block %+F created\n", nn, pred_blk));
664 mode = get_irn_mode(e_prime);
666 get_irn_dbg_info(e_prime),
667 current_ir_graph, pred_blk,
670 get_irn_arity(e_prime),
671 get_irn_in(e_prime) + 1);
672 copy_node_attr(e_prime, nn);
673 if (proj_pred != NULL) {
674 set_Proj_pred(nn, proj_pred);
677 DB((dbg, LEVEL_2, "New node %+F in block %+F created\n", nn, pred_blk));
678 ir_valueset_insert(pred_info->avail_out, add(nn, lookup(expr)), nn);
679 pred_info->avail = nn;
682 in[pos] = pred_info->avail;
684 phi = new_r_Phi(current_ir_graph, block, arity, in, mode);
685 value = add(phi, lookup(expr));
686 ir_valueset_replace(curr_info->avail_out, value, phi);
687 ir_valueset_insert(curr_info->new_set, value, phi);
689 DB((dbg, LEVEL_2, "New %+F for redundant %+F created\n", phi, expr));
690 ir_valueset_remove_iterator(curr_info->antic_in, &iter);
693 } /* node_set_foreach */
697 * Walker, change nodes by its value if different.
699 * We cannot do the changes right here, as this would change
700 * the hash values of the nodes in the avail_out set!
702 * @param irn the node
703 * @param ctx the walker environment
705 static void eliminate(ir_node *irn, void *ctx) {
708 if (is_no_Block(irn)) {
709 ir_node *block = get_nodes_block(irn);
710 block_info *bl = get_block_info(block);
711 ir_node *value = lookup(irn);
714 ir_node *expr = ir_valueset_lookup(bl->avail_out, value);
716 if (expr != NULL && expr != irn) {
717 elim_pair *p = obstack_alloc(env->obst, sizeof(*p));
721 p->next = env->pairs;
722 p->reason = FS_OPT_GVN_FULLY;
730 * Do all the recorded changes and optimize
731 * newly created Phi's.
733 * @param pairs list of elimination pairs
735 static void eliminate_nodes(elim_pair *pairs) {
738 for (p = pairs; p != NULL; p = p->next) {
739 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
741 * PRE tends to create Phi(self, self, ... , x, self, self, ...)
742 * which we can optimize here
744 if (is_Phi(p->new_node)) {
748 for (i = get_irn_intra_arity(p->new_node) - 1; i >= 0; --i) {
749 ir_node *pred = get_irn_n(p->new_node, i);
751 if (pred != p->old_node) {
762 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
763 exchange(p->old_node, p->new_node);
765 } /* eliminate_nodes */
768 * Argh: Endless loops cause problems, because the
769 * insert algorithm did not terminate. We get translated nodes that
770 * references the origin. These nodes are translated again and again...
772 * The current fix is to use post-dominance. This simple ignores
773 * endless loops, ie we cannot optimize them.
775 void do_gvn_pre(ir_graph *irg)
779 optimization_state_t state;
781 unsigned antic_iter, insert_iter;
782 ir_node *value, *expr;
784 /* register a debug mask */
785 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
787 /* edges will crash if enabled due to our allocate on other obstack trick */
788 edges_deactivate(irg);
790 value_table = new_identities();
791 ir_nodemap_init(&value_map);
796 a_env.start_block = get_irg_start_block(irg);
797 a_env.end_block = get_irg_end_block(irg);
800 /* Move Proj's into the same block as their args,
801 else we would assign the result to wrong blocks */
802 normalize_proj_nodes(irg);
804 /* critical edges MUST be removed */
805 remove_critical_cf_edges(irg);
807 /* we need dominator for Antic_out calculation */
809 assure_postdoms(irg);
810 /* we get all nodes of a block by following outs */
811 assure_irg_outs(irg);
814 * Switch on GCSE. We need it to correctly compute
815 * the leader of a node by hashing.
817 save_optimization_state(&state);
818 set_opt_global_cse(1);
820 DB((dbg, LEVEL_1, "Doing GVN-PRE for %e\n", get_irg_entity(irg)));
822 /* allocate block info for all blocks */
823 irg_walk_blkwise_graph(irg, NULL, topo_walker, &a_env);
825 /* clean the exp_gen set. Doing this here saves the cleanup in the iteration. */
826 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
827 ir_valueset_iterator_t iter;
829 foreach_valueset(bl_info->exp_gen, value, expr, iter) {
831 ir_valueset_remove_iterator(bl_info->exp_gen, &iter);
835 /* compute the available value sets for all blocks */
836 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
838 /* compute the anticipated value sets for all blocks */
840 a_env.first_iter = 1;
842 /* we use the visited flag to mark non-clean nodes */
843 inc_irg_visited(irg);
845 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++antic_iter));
847 postdom_tree_walk_irg(irg, compute_antic, NULL, &a_env);
848 a_env.first_iter = 0;
849 DB((dbg, LEVEL_1, "------------------------\n"));
850 } while (a_env.changes != 0);
852 /* compute redundant expressions */
855 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++insert_iter));
857 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
858 DB((dbg, LEVEL_1, "------------------------\n"));
859 } while (a_env.changes != 0);
861 /* last step: eliminate nodes */
862 irg_walk_graph(irg, NULL, eliminate, &a_env);
863 eliminate_nodes(a_env.pairs);
865 /* clean up: delete all sets */
866 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
867 ir_valueset_del(bl_info->exp_gen);
868 ir_valueset_del(bl_info->avail_out);
869 ir_valueset_del(bl_info->antic_in);
870 if (bl_info->new_set)
871 ir_valueset_del(bl_info->new_set);
873 del_identities(value_table);
874 ir_nodemap_destroy(&value_map);
875 obstack_free(&obst, NULL);
878 /* pin the graph again: This is needed due to the use of set_opt_global_cse(1) */
879 set_irg_pinned(irg, op_pin_state_pinned);
880 restore_optimization_state(&state);
883 set_irg_outs_inconsistent(irg);
884 set_irg_loopinfo_inconsistent(irg);