2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
40 #include "irnodemap.h"
41 #include "irnodeset.h"
43 #include "iropt_dbg.h"
46 #include "irgraph_t.h"
50 /** Additional info we need for every block. */
51 typedef struct block_info {
52 ir_valueset_t *exp_gen; /**< The set of expression per block. */
53 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
54 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
55 ir_valueset_t *new_set; /**< The set of all new values for a block. */
56 ir_node *avail; /**< The get_map(avail, block) result. */
57 int not_found; /**< Non-zero, if avail was not found in this block. */
58 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
62 * A pair of nodes that must be exchanged.
63 * We must defer the exchange because our hash-sets cannot
64 * find an already replace node else.
66 typedef struct elim_pair {
67 ir_node *old_node; /**< The old node that will be replaced. */
68 ir_node *new_node; /**< The new node. */
69 struct elim_pair *next; /**< Links all entries in a list. */
70 int reason; /**< The reason for the replacement. */
73 /** The environment for the GVN-PRE algorithm */
74 typedef struct pre_env {
75 struct obstack *obst; /**< The obstack to allocate on. */
76 ir_node *start_block; /**< The start block of the current graph. */
77 ir_node *end_block; /**< The end block of the current graph */
78 block_info *list; /**< Links all block info entires for easier recovery. */
79 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
80 unsigned last_idx; /**< last node index of "old" nodes, all higher indexes are newly created once. */
81 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
82 char first_iter; /**< non-zero for first iteration */
85 static pset *value_table;
86 static ir_nodemap_t value_map;
88 /** The debug module handle. */
89 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
91 /* ---------- Functions for Value sets ---------- */
93 /** computes dst = dst \/ src for value sets */
94 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
96 ir_valueset_iterator_t iter;
97 ir_node *value, *expr;
99 foreach_valueset(src, value, expr, iter) {
100 ir_valueset_insert(dst, value, expr);
105 /* ---------- Functions for Values ---------- */
108 * Add a node e representing the value v to the set.
110 * @param e a node representing an expression
111 * @param v a node representing a value
113 * @return the final value for the expression e
115 static ir_node *add(ir_node *e, ir_node *v)
118 ir_node *pred = get_Proj_pred(v);
119 ir_node *v_pred = identify_remember(value_table, pred);
121 if (v_pred != pred) {
122 /* must create a new value here */
123 v = new_r_Proj(current_ir_graph, get_nodes_block(v_pred), v_pred, get_irn_mode(v), get_Proj_proj(v));
126 v = identify_remember(value_table, v);
127 ir_nodemap_insert(&value_map, e, v);
132 * Lookup a value in a value set.
134 * @param e a node representing an expression
136 * @return a node representing the value or NULL if
137 * the given expression is not available
139 static ir_node *lookup(ir_node *e)
141 ir_node *value = ir_nodemap_get(&value_map, e);
143 return identify_remember(value_table, value);
148 * Return the block info of a block.
150 * @param block the block
152 static block_info *get_block_info(ir_node *block) {
153 return get_irn_link(block);
154 } /* get_block_info */
157 * Allocate a block info for a block.
159 * @param block the block
160 * @param env the environment
162 static void alloc_blk_info(ir_node *block, pre_env *env) {
163 block_info *info = obstack_alloc(env->obst, sizeof(*info));
165 set_irn_link(block, info);
166 info->exp_gen = ir_valueset_new(16);
167 info->avail_out = ir_valueset_new(16);
168 info->antic_in = ir_valueset_new(16);
169 info->new_set = NULL;
172 info->next = env->list;
174 } /* alloc_blk_info */
177 * Returns non-zero if a node is movable and a possible candidate for PRE.
181 static int is_nice_value(ir_node *n) {
185 n = get_Proj_pred(n);
186 if (get_irn_pinned(n) == op_pin_state_pinned)
188 mode = get_irn_mode(n);
189 if (!mode_is_data(mode)) {
190 if (! is_Div(n) && ! is_Mod(n) && ! is_DivMod(n))
192 if (! is_NoMem(get_fragile_op_mem(n)))
196 } /* is_nice_value */
202 * @param set the set to dump
203 * @param txt a text to describe the set
204 * @param block the owner block of the set
206 static void dump_value_set(ir_valueset_t *set, char *txt, ir_node *block) {
207 ir_valueset_iterator_t iter;
208 ir_node *value, *expr;
211 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
213 foreach_valueset(set, value, expr, iter) {
215 DB((dbg, LEVEL_2, "\n"));
217 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
219 DB((dbg, LEVEL_2, " %+F,", expr));
222 DB((dbg, LEVEL_2, "\n}\n"));
223 } /* dump_value_set */
226 #define dump_value_set(set, txt, block)
227 #endif /* DEBUG_libfirm */
230 * Topological walker. Allocates block info for every block and place nodes in topological
231 * order into the nodes set.
233 static void topo_walker(ir_node *irn, void *ctx) {
240 /* the topological walker ensures that blocks are visited before anything else */
241 alloc_blk_info(irn, env);
244 /* GVN step: remember the value */
245 value = add(irn, irn);
247 /* no need to put constants into the sets: they are always redundant */
248 if (! is_nice_value(irn) || is_irn_constlike(irn))
251 /* Do not put mode_T nodes info the sets, or PhiT will be created
252 (which are not allowed in Firm). Instead, put the Proj's here only. */
253 if (get_irn_mode(irn) == mode_T)
256 /* place this node into the set of possible nodes of its block */
257 block = get_nodes_block(irn);
258 info = get_block_info(block);
260 ir_valueset_insert(info->exp_gen, value, irn);
264 * Computes Avail_out(block):
266 * Avail_in(block) = Avail_out(dom(block))
267 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
270 * This function must be called in the top-down dominance order:
271 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
273 * @param block the block
274 * @param ctx walker context
276 static void compute_avail_top_down(ir_node *block, void *ctx)
279 block_info *dom_info;
280 block_info *info = get_block_info(block);
283 /* we don't need the end block Avail */
284 if (block == env->end_block)
288 * First add all nodes from the dominator.
289 * This must be done to ensure that Antic_out contains the leader
290 * for every node. The root has no dominator.
292 if (block != env->start_block) {
293 dom_blk = get_Block_idom(block);
294 assert(is_Block(dom_blk));
296 dom_info = get_block_info(dom_blk);
299 value_union(info->avail_out, dom_info->avail_out);
301 value_union(info->avail_out, info->exp_gen);
303 dump_value_set(info->avail_out, "Avail_out", block);
304 } /* compute_avail_top_down */
307 * check if a node n is clean in block block.
310 * @param block the block
312 static int _is_clean(ir_node *n, ir_node *block)
316 if (get_nodes_block(n) != block)
324 if (! is_nice_value(n))
326 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
327 ir_node *pred = get_irn_n(n, i);
328 if (! _is_clean(pred, block))
338 * check if a node n is clean.
340 * @param n the node to check
342 static int is_clean(ir_node *n) {
343 int res = _is_clean(n, get_nodes_block(n));
348 * Implements phi_translate. Translate a expression above a Phi.
350 * @param node the node
351 * @param block the block in which the node is translated
352 * @param pos the input number of the destination block
353 * @param env the environment
355 * @return a node representing the translated value
357 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, pre_env *env)
364 if (get_nodes_block(node) == block) {
365 /* a Phi inside our block */
366 return get_Phi_pred(node, pos);
368 /* already outside */
372 arity = get_irn_intra_arity(node);
374 /* check if the node has at least one Phi predecessor */
375 for (i = 0; i < arity; ++i) {
376 ir_node *pred = get_irn_intra_n(node, i);
377 ir_node *leader = lookup(pred);
379 leader = leader != NULL ? leader : pred;
380 if (is_Phi(leader) && get_nodes_block(pred) == block)
384 /* no Phi in the predecessors */
388 /* Create a copy of the node in the pos'th predecessor block.
389 Use our environmental obstack, as these nodes are always
391 old = current_ir_graph->obst;
392 current_ir_graph->obst = env->obst;
394 get_irn_dbg_info(node),
401 /* We need the attribute copy here, because the Hash value of a
402 node might depend on that. */
403 copy_node_attr(node, nn);
405 set_nodes_block(nn, get_nodes_block(node));
406 for (i = 0; i < arity; ++i) {
407 ir_node *pred = get_irn_intra_n(node, i);
408 ir_node *leader = lookup(pred);
410 leader = leader != NULL ? leader : pred;
411 if (is_Phi(leader) && get_irn_intra_n(pred, -1) == block)
412 set_irn_n(nn, i, get_Phi_pred(leader, pos));
414 set_irn_n(nn, i, leader);
416 current_ir_graph->obst = old;
418 } /* phi_translate */
421 * Block-walker, computes Antic_in(block).
423 * @param block the block
424 * @param ctx the walker environment
426 static void compute_antic(ir_node *block, void *ctx) {
428 block_info *succ_info;
429 block_info *info = get_block_info(block);
430 ir_node *succ, *value, *expr;
432 ir_valueset_iterator_t iter;
434 /* no need for computations in start block */
435 if (block == env->start_block)
438 size = ir_valueset_size(info->antic_in);
440 /* the end block has no successor */
441 if (block != env->end_block) {
445 * This step puts all generated expression from the current
446 * current block into Antic_in.
447 * It is enough to do this in the first iteration only, because
448 * the set info->exp_gen is not changed anymore.
450 if (env->first_iter) {
451 foreach_valueset(info->exp_gen, value, expr, iter) {
452 ir_valueset_insert(info->antic_in, value, expr);
456 n_succ = get_Block_n_cfg_outs(block);
460 /* find blocks position in succ's block predecessors */
461 succ = get_Block_cfg_out(block, 0);
462 for (i = get_Block_n_cfgpreds(succ) - 1; i >= 0; --i) {
463 if (get_Block_cfgpred_block(succ, i) == block) {
470 succ_info = get_block_info(succ);
471 /* translate into list: we cannot insert into a set we iterate
472 * and succ might be equal to block for endless loops */
473 foreach_valueset(succ_info->antic_in, value, expr, iter) {
474 ir_node *trans = phi_translate(expr, succ, pos, env);
477 ir_valueset_insert(info->antic_in, value, trans);
479 } else { /* n_succ > 1 */
481 block_info *succ0_info;
486 /* Select a successor to compute the disjoint of all Nodes
487 sets, it might be useful to select the block with the
488 smallest number of nodes. For simplicity we choose the
490 succ0 = get_Block_cfg_out(block, 0);
491 succ0_info = get_block_info(succ0);
492 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
493 /* we need the disjoint */
494 for (i = 1; i < n_succ; ++i) {
495 ir_node *succ = get_Block_cfg_out(block, i);
496 block_info *succ_info = get_block_info(succ);
497 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL)
501 /* we found a value that is common in all Antic_in(succ(b)),
502 put it in Antic_in(b) if the value is NOT already represented. */
503 ir_valueset_insert(info->antic_in, value, expr);
509 /* we do not need a clean here, because we ensure that only cleaned nodes are in exp_gen
510 * and all other sets */
512 dump_value_set(info->antic_in, "Antic_in", block);
513 if (size != ir_valueset_size(info->antic_in)) {
514 /* the Antic_in set has changed */
517 } /* compute_antic */
520 * Perform insertion of partially redundant values.
521 * For every Block node, do the following:
522 * 1. Propagate the NEW_SETS of the dominator into the current block.
523 * If the block has multiple predecessors,
524 * 2a. Iterate over the ANTIC expressions for the block to see if
525 * any of them are partially redundant.
526 * 2b. If so, insert them into the necessary predecessors to make
527 * the expression fully redundant.
528 * 2c. Insert a new Phi merging the values of the predecessors.
529 * 2d. Insert the new Phi, and the new expressions, into the
532 * @param block the block
533 * @param ctx the walker environment
535 static void insert_nodes(ir_node *block, void *ctx)
538 ir_node *value, *expr, *idom, *first_s, *worklist;
539 block_info *curr_info, *idom_info;
540 int pos, arity = get_irn_intra_arity(block);
541 int all_same, by_some, updated;
542 ir_valueset_iterator_t iter;
544 /* ensure that even the start block has a new_set */
545 curr_info = get_block_info(block);
546 if (curr_info->new_set)
547 ir_valueset_del(curr_info->new_set);
548 curr_info->new_set = ir_valueset_new(16);
550 if (block == env->start_block)
553 idom = get_Block_idom(block);
554 idom_info = get_block_info(idom);
556 /* update the new_sets */
558 dump_value_set(idom_info->new_set, "[New Set]", idom);
559 foreach_valueset(idom_info->new_set, value, expr, iter) {
560 ir_valueset_insert(curr_info->new_set, value, expr);
561 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
564 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
570 /* convert the set into a list. This allows the removal of
571 * elements from the set */
573 foreach_valueset(curr_info->antic_in, value, expr, iter) {
576 /* If the value was already computed in the dominator, then
577 it is totally redundant. Hence we have nothing to insert. */
578 if (ir_valueset_lookup(idom_info->avail_out, value)) {
579 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
588 /* for all predecessor blocks */
589 for (pos = 0; pos < arity; ++pos) {
590 block_info *pred_info;
591 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
592 ir_node *e_prime, *v_prime, *e_dprime;
594 /* ignore bad blocks. */
595 if (is_Bad(pred_blk))
598 e_prime = phi_translate(expr, block, pos, env);
599 v_prime = lookup(e_prime);
603 pred_info = get_block_info(pred_blk);
604 e_dprime = ir_valueset_lookup(pred_info->avail_out, v_prime);
606 if (e_dprime == NULL) {
607 pred_info->avail = e_prime;
608 pred_info->not_found = 1;
611 pred_info->avail = e_dprime;
612 pred_info->not_found = 0;
613 mode = get_irn_mode(e_dprime);
618 else if (first_s != e_dprime)
621 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, e_dprime, pred_blk));
625 /* If it's not the same value already existing along every predecessor, and
626 it's defined by some predecessor, it is partially redundant. */
627 if (! all_same && by_some) {
630 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
632 in = xmalloc(arity * sizeof(*in));
633 /* for all predecessor blocks */
634 for (pos = 0; pos < arity; ++pos) {
635 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
636 block_info *pred_info = get_block_info(pred_blk);
638 /* ignore bad blocks. */
639 if (is_Bad(pred_blk)) {
644 /* ignore blocks that already have the expression */
645 if (pred_info->not_found) {
646 ir_node *e_prime = pred_info->avail;
648 if (!is_Phi(e_prime)) {
649 ir_node *proj_pred = NULL;
650 if (is_Proj(e_prime)) {
651 ir_node *pred = get_Proj_pred(e_prime);
652 mode = get_irn_mode(pred);
654 get_irn_dbg_info(pred),
655 current_ir_graph, pred_blk,
659 get_irn_in(pred) + 1);
660 copy_node_attr(pred, nn);
662 DB((dbg, LEVEL_2, "New node %+F in block %+F created\n", nn, pred_blk));
665 mode = get_irn_mode(e_prime);
667 get_irn_dbg_info(e_prime),
668 current_ir_graph, pred_blk,
671 get_irn_arity(e_prime),
672 get_irn_in(e_prime) + 1);
673 copy_node_attr(e_prime, nn);
674 if (proj_pred != NULL) {
675 set_Proj_pred(nn, proj_pred);
678 DB((dbg, LEVEL_2, "New node %+F in block %+F created\n", nn, pred_blk));
679 ir_valueset_insert(pred_info->avail_out, add(nn, lookup(expr)), nn);
680 pred_info->avail = nn;
683 in[pos] = pred_info->avail;
685 phi = new_r_Phi(current_ir_graph, block, arity, in, mode);
686 value = add(phi, lookup(expr));
687 ir_valueset_replace(curr_info->avail_out, value, phi);
688 ir_valueset_insert(curr_info->new_set, value, phi);
690 DB((dbg, LEVEL_2, "New %+F for redundant %+F created\n", phi, expr));
691 ir_valueset_remove_iterator(curr_info->antic_in, &iter);
694 } /* node_set_foreach */
698 * Walker, change nodes by its value if different.
700 * We cannot do the changes right here, as this would change
701 * the hash values of the nodes in the avail_out set!
703 * @param irn the node
704 * @param ctx the walker environment
706 static void eliminate(ir_node *irn, void *ctx) {
709 if (is_no_Block(irn)) {
710 ir_node *block = get_nodes_block(irn);
711 block_info *bl = get_block_info(block);
712 ir_node *value = lookup(irn);
715 ir_node *expr = ir_valueset_lookup(bl->avail_out, value);
717 if (expr != NULL && expr != irn) {
718 elim_pair *p = obstack_alloc(env->obst, sizeof(*p));
722 p->next = env->pairs;
723 p->reason = get_irn_idx(expr) >= env->last_idx ? FS_OPT_GVN_PARTLY : FS_OPT_GVN_FULLY;
731 * Do all the recorded changes and optimize
732 * newly created Phi's.
734 * @param pairs list of elimination pairs
736 static void eliminate_nodes(elim_pair *pairs) {
739 for (p = pairs; p != NULL; p = p->next) {
740 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
742 * PRE tends to create Phi(self, self, ... , x, self, self, ...)
743 * which we can optimize here
745 if (is_Phi(p->new_node)) {
749 for (i = get_irn_intra_arity(p->new_node) - 1; i >= 0; --i) {
750 ir_node *pred = get_irn_n(p->new_node, i);
752 if (pred != p->old_node) {
763 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
764 exchange(p->old_node, p->new_node);
766 } /* eliminate_nodes */
769 * Argh: Endless loops cause problems, because the
770 * insert algorithm did not terminate. We get translated nodes that
771 * references the origin. These nodes are translated again and again...
773 * The current fix is to use post-dominance. This simple ignores
774 * endless loops, ie we cannot optimize them.
776 void do_gvn_pre(ir_graph *irg)
780 optimization_state_t state;
782 unsigned antic_iter, insert_iter;
783 ir_node *value, *expr;
785 /* register a debug mask */
786 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
788 /* edges will crash if enabled due to our allocate on other obstack trick */
789 edges_deactivate(irg);
791 value_table = new_identities();
792 ir_nodemap_init(&value_map);
797 a_env.start_block = get_irg_start_block(irg);
798 a_env.end_block = get_irg_end_block(irg);
801 /* Move Proj's into the same block as their args,
802 else we would assign the result to wrong blocks */
803 normalize_proj_nodes(irg);
805 /* critical edges MUST be removed */
806 remove_critical_cf_edges(irg);
808 /* we need dominator for Antic_out calculation */
810 assure_postdoms(irg);
811 /* we get all nodes of a block by following outs */
812 assure_irg_outs(irg);
815 * Switch on GCSE. We need it to correctly compute
816 * the leader of a node by hashing.
818 save_optimization_state(&state);
819 set_opt_global_cse(1);
821 DB((dbg, LEVEL_1, "Doing GVN-PRE for %e\n", get_irg_entity(irg)));
823 /* allocate block info for all blocks */
824 irg_walk_blkwise_graph(irg, NULL, topo_walker, &a_env);
826 /* clean the exp_gen set. Doing this here saves the cleanup in the iteration. */
827 inc_irg_visited(irg);
828 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
829 ir_valueset_iterator_t iter;
831 foreach_valueset(bl_info->exp_gen, value, expr, iter) {
833 ir_valueset_remove_iterator(bl_info->exp_gen, &iter);
837 /* compute the available value sets for all blocks */
838 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
840 /* compute the anticipated value sets for all blocks */
842 a_env.first_iter = 1;
844 /* we use the visited flag to mark non-clean nodes */
845 inc_irg_visited(irg);
847 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++antic_iter));
849 postdom_tree_walk_irg(irg, compute_antic, NULL, &a_env);
850 a_env.first_iter = 0;
851 DB((dbg, LEVEL_1, "------------------------\n"));
852 } while (a_env.changes != 0);
854 /* compute redundant expressions */
856 a_env.last_idx = get_irg_last_idx(irg);
858 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++insert_iter));
860 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
861 DB((dbg, LEVEL_1, "------------------------\n"));
862 } while (a_env.changes != 0);
864 /* last step: eliminate nodes */
865 irg_walk_graph(irg, NULL, eliminate, &a_env);
866 eliminate_nodes(a_env.pairs);
868 /* clean up: delete all sets */
869 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
870 ir_valueset_del(bl_info->exp_gen);
871 ir_valueset_del(bl_info->avail_out);
872 ir_valueset_del(bl_info->antic_in);
873 if (bl_info->new_set)
874 ir_valueset_del(bl_info->new_set);
876 del_identities(value_table);
877 ir_nodemap_destroy(&value_map);
878 obstack_free(&obst, NULL);
881 /* pin the graph again: This is needed due to the use of set_opt_global_cse(1) */
882 set_irg_pinned(irg, op_pin_state_pinned);
883 restore_optimization_state(&state);
886 set_irg_outs_inconsistent(irg);
887 set_irg_loopinfo_inconsistent(irg);