2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
40 #include "irnodemap.h"
41 #include "irnodeset.h"
43 #include "iropt_dbg.h"
46 #include "irgraph_t.h"
50 /** Additional info we need for every block. */
51 typedef struct block_info {
52 ir_valueset_t *exp_gen; /**< The set of expression per block. */
53 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
54 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
55 ir_valueset_t *new_set; /**< The set of all new values for a block. */
56 ir_node *avail; /**< The get_map(avail, block) result. */
57 ir_node *block; /**< The Block of the block info. */
58 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
59 int not_found; /**< Non-zero, if avail was not found in this block. */
63 * A pair of nodes that must be exchanged.
64 * We must defer the exchange because our hash-sets cannot
65 * find an already replace node else.
67 typedef struct elim_pair {
68 ir_node *old_node; /**< The old node that will be replaced. */
69 ir_node *new_node; /**< The new node. */
70 struct elim_pair *next; /**< Links all entries in a list. */
71 int reason; /**< The reason for the replacement. */
74 /** The environment for the GVN-PRE algorithm */
75 typedef struct pre_env {
76 struct obstack *obst; /**< The obstack to allocate on. */
77 ir_node *start_block; /**< The start block of the current graph. */
78 ir_node *end_block; /**< The end block of the current graph */
79 block_info *list; /**< Links all block info entires for easier recovery. */
80 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
81 unsigned last_idx; /**< last node index of "old" nodes, all higher indexes are newly created once. */
82 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
83 char first_iter; /**< non-zero for first iteration */
86 static pset *value_table;
87 static ir_nodemap_t value_map;
89 /** The debug module handle. */
90 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
92 /* ---------- Functions for Value sets ---------- */
94 /** computes dst = dst \/ src for value sets */
95 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
97 ir_valueset_iterator_t iter;
98 ir_node *value, *expr;
100 foreach_valueset(src, value, expr, iter) {
101 ir_valueset_insert(dst, value, expr);
106 /* ---------- Functions for Values ---------- */
109 * Add a node e representing the value v to the set.
111 * @param e a node representing an expression
112 * @param v a node representing a value
114 * @return the final value for the expression e
116 static ir_node *add(ir_node *e, ir_node *v)
119 ir_node *pred = get_Proj_pred(v);
120 ir_node *v_pred = identify_remember(value_table, pred);
122 if (v_pred != pred) {
123 /* must create a new value here */
124 v = new_r_Proj(current_ir_graph, get_nodes_block(v_pred), v_pred, get_irn_mode(v), get_Proj_proj(v));
127 v = identify_remember(value_table, v);
128 ir_nodemap_insert(&value_map, e, v);
133 * Lookup a value in a value set.
135 * @param e a node representing an expression
137 * @return a node representing the value or NULL if
138 * the given expression is not available
140 static ir_node *lookup(ir_node *e)
142 ir_node *value = ir_nodemap_get(&value_map, e);
144 return identify_remember(value_table, value);
149 * Return the block info of a block.
151 * @param block the block
153 static block_info *get_block_info(ir_node *block) {
154 return get_irn_link(block);
155 } /* get_block_info */
158 * Allocate a block info for a block.
160 * @param block the block
161 * @param env the environment
163 static void alloc_blk_info(ir_node *block, pre_env *env) {
164 block_info *info = obstack_alloc(env->obst, sizeof(*info));
166 set_irn_link(block, info);
167 info->exp_gen = ir_valueset_new(16);
168 info->avail_out = ir_valueset_new(16);
169 info->antic_in = ir_valueset_new(16);
170 info->new_set = NULL;
173 info->next = env->list;
176 } /* alloc_blk_info */
179 * Returns non-zero if a node is movable and a possible candidate for PRE.
183 static int is_nice_value(ir_node *n) {
187 n = get_Proj_pred(n);
188 if (get_irn_pinned(n) == op_pin_state_pinned)
190 mode = get_irn_mode(n);
191 if (!mode_is_data(mode)) {
192 if (! is_Div(n) && ! is_Mod(n) && ! is_DivMod(n))
194 if (! is_NoMem(get_fragile_op_mem(n)))
198 } /* is_nice_value */
204 * @param set the set to dump
205 * @param txt a text to describe the set
206 * @param block the owner block of the set
208 static void dump_value_set(ir_valueset_t *set, char *txt, ir_node *block) {
209 ir_valueset_iterator_t iter;
210 ir_node *value, *expr;
213 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
215 foreach_valueset(set, value, expr, iter) {
217 DB((dbg, LEVEL_2, "\n"));
219 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
221 DB((dbg, LEVEL_2, " %+F,", expr));
224 DB((dbg, LEVEL_2, "\n}\n"));
225 } /* dump_value_set */
228 #define dump_value_set(set, txt, block)
229 #endif /* DEBUG_libfirm */
232 * Topological walker. Allocates block info for every block and place nodes in topological
233 * order into the nodes set.
235 static void topo_walker(ir_node *irn, void *ctx) {
242 /* the topological walker ensures that blocks are visited before anything else */
243 alloc_blk_info(irn, env);
246 /* GVN step: remember the value */
247 value = add(irn, irn);
249 /* no need to put constants into the sets: they are always redundant */
250 if (! is_nice_value(irn) || is_irn_constlike(irn))
253 /* Do not put mode_T nodes info the sets, or PhiT will be created
254 (which are not allowed in Firm). Instead, put the Proj's here only. */
255 if (get_irn_mode(irn) == mode_T)
258 /* place this node into the set of possible nodes of its block */
259 block = get_nodes_block(irn);
260 info = get_block_info(block);
262 ir_valueset_insert(info->exp_gen, value, irn);
266 * Computes Avail_out(block):
268 * Avail_in(block) = Avail_out(dom(block))
269 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
272 * This function must be called in the top-down dominance order:
273 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
275 * @param block the block
276 * @param ctx walker context
278 static void compute_avail_top_down(ir_node *block, void *ctx)
281 block_info *dom_info;
282 block_info *info = get_block_info(block);
285 /* we don't need the end block Avail */
286 if (block == env->end_block)
290 * First add all nodes from the dominator.
291 * This must be done to ensure that Antic_out contains the leader
292 * for every node. The root has no dominator.
294 if (block != env->start_block) {
295 dom_blk = get_Block_idom(block);
296 assert(is_Block(dom_blk));
298 dom_info = get_block_info(dom_blk);
301 value_union(info->avail_out, dom_info->avail_out);
303 value_union(info->avail_out, info->exp_gen);
305 dump_value_set(info->avail_out, "Avail_out", block);
306 } /* compute_avail_top_down */
309 * check if a node n is clean in block block.
312 * @param block the block
313 * @param set a value set, containing the already processed predecessors
315 static int is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *set) {
321 if (! is_nice_value(n))
324 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
325 ir_node *pred = get_irn_n(n, i);
328 if (get_nodes_block(pred) != block)
334 if (! is_nice_value(pred))
337 value = lookup(pred);
340 if (! ir_valueset_lookup(set, value))
344 } /* is_clean_in_block */
347 * Implements phi_translate. Translate a expression above a Phi.
349 * @param node the node
350 * @param block the block in which the node is translated
351 * @param pos the input number of the destination block
352 * @param translated the valueset containing the other already translated nodes
354 * @return a node representing the translated value
356 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *translated)
362 if (get_nodes_block(node) == block) {
363 /* a Phi inside our block */
364 return get_Phi_pred(node, pos);
366 /* already outside */
370 arity = get_irn_intra_arity(node);
372 /* check if the node has at least one Phi predecessor */
373 for (i = 0; i < arity; ++i) {
374 ir_node *pred = get_irn_intra_n(node, i);
375 ir_node *leader = lookup(pred);
378 leader = leader != NULL ? leader : pred;
379 trans = ir_valueset_lookup(translated, leader);
381 if ((trans != NULL && trans != leader) || (is_Phi(leader) && get_nodes_block(leader) == block))
385 /* no translation needed */
390 get_irn_dbg_info(node),
397 /* We need the attribute copy here, because the Hash value of a
398 node might depend on that. */
399 copy_node_attr(node, nn);
401 set_nodes_block(nn, get_nodes_block(node));
402 for (i = 0; i < arity; ++i) {
403 ir_node *pred = get_irn_intra_n(node, i);
404 ir_node *leader = lookup(pred);
407 leader = leader != NULL ? leader : pred;
408 trans = ir_valueset_lookup(translated, leader);
412 if (is_Phi(trans) && get_nodes_block(trans) == block)
413 set_irn_n(nn, i, get_Phi_pred(trans, pos));
415 set_irn_n(nn, i, trans);
417 nn = optimize_node(nn);
419 } /* phi_translate */
422 * Block-walker, computes Antic_in(block).
424 * @param block the block
425 * @param ctx the walker environment
427 static void compute_antic(ir_node *block, void *ctx) {
429 block_info *succ_info;
430 block_info *info = get_block_info(block);
431 ir_node *succ, *value, *expr;
433 ir_valueset_iterator_t iter;
435 /* no need for computations in start block */
436 if (block == env->start_block)
439 size = ir_valueset_size(info->antic_in);
441 /* the end block has no successor */
442 if (block != env->end_block) {
446 * This step puts all generated expression from the current
447 * current block into Antic_in.
448 * It is enough to do this in the first iteration only, because
449 * the set info->exp_gen is not changed anymore.
451 if (env->first_iter) {
452 foreach_valueset(info->exp_gen, value, expr, iter) {
453 ir_valueset_insert(info->antic_in, value, expr);
457 n_succ = get_Block_n_cfg_outs(block);
461 /* find blocks position in succ's block predecessors */
462 succ = get_Block_cfg_out(block, 0);
463 for (i = get_Block_n_cfgpreds(succ) - 1; i >= 0; --i) {
464 if (get_Block_cfgpred_block(succ, i) == block) {
471 succ_info = get_block_info(succ);
472 /* translate into list: we cannot insert into a set we iterate
473 * and succ might be equal to block for endless loops */
474 foreach_valueset(succ_info->antic_in, value, expr, iter) {
475 ir_node *trans = phi_translate(expr, succ, pos, info->antic_in);
477 if (is_clean_in_block(trans, block, info->antic_in))
478 ir_valueset_insert(info->antic_in, value, trans);
480 } else { /* n_succ > 1 */
482 block_info *succ0_info;
487 /* Select a successor to compute the disjoint of all Nodes
488 sets, it might be useful to select the block with the
489 smallest number of nodes. For simplicity we choose the
491 succ0 = get_Block_cfg_out(block, 0);
492 succ0_info = get_block_info(succ0);
493 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
494 /* we need the disjoint */
495 for (i = 1; i < n_succ; ++i) {
496 ir_node *succ = get_Block_cfg_out(block, i);
497 block_info *succ_info = get_block_info(succ);
498 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL)
502 /* we found a value that is common in all Antic_in(succ(b)),
503 put it in Antic_in(b) if the value is NOT already represented. */
504 if (is_clean_in_block(expr, block, info->antic_in))
505 ir_valueset_insert(info->antic_in, value, expr);
511 /* we do not need a clean here, because we ensure that only cleaned nodes are in exp_gen
512 * and all other sets */
514 dump_value_set(info->antic_in, "Antic_in", block);
515 if (size != ir_valueset_size(info->antic_in)) {
516 /* the Antic_in set has changed */
519 } /* compute_antic */
522 * Perform insertion of partially redundant values.
523 * For every Block node, do the following:
524 * 1. Propagate the NEW_SETS of the dominator into the current block.
525 * If the block has multiple predecessors,
526 * 2a. Iterate over the ANTIC expressions for the block to see if
527 * any of them are partially redundant.
528 * 2b. If so, insert them into the necessary predecessors to make
529 * the expression fully redundant.
530 * 2c. Insert a new Phi merging the values of the predecessors.
531 * 2d. Insert the new Phi, and the new expressions, into the
534 * @param block the block
535 * @param ctx the walker environment
537 static void insert_nodes(ir_node *block, void *ctx)
540 ir_node *value, *expr, *idom, *first_s, *worklist;
541 block_info *curr_info, *idom_info;
542 int pos, arity = get_irn_intra_arity(block);
543 int all_same, by_some, updated;
544 ir_valueset_iterator_t iter;
546 /* ensure that even the start block has a new_set */
547 curr_info = get_block_info(block);
548 if (curr_info->new_set)
549 ir_valueset_del(curr_info->new_set);
550 curr_info->new_set = ir_valueset_new(16);
552 if (block == env->start_block)
555 idom = get_Block_idom(block);
556 idom_info = get_block_info(idom);
558 /* update the new_sets */
560 dump_value_set(idom_info->new_set, "[New Set]", idom);
561 foreach_valueset(idom_info->new_set, value, expr, iter) {
562 ir_valueset_insert(curr_info->new_set, value, expr);
563 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
566 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
572 /* convert the set into a list. This allows the removal of
573 * elements from the set */
575 foreach_valueset(curr_info->antic_in, value, expr, iter) {
578 /* If the value was already computed in the dominator, then
579 it is totally redundant. Hence we have nothing to insert. */
580 if (ir_valueset_lookup(idom_info->avail_out, value)) {
581 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
590 /* for all predecessor blocks */
591 for (pos = 0; pos < arity; ++pos) {
592 block_info *pred_info;
593 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
594 ir_node *e_prime, *v_prime, *e_dprime;
596 /* ignore bad blocks. */
597 if (is_Bad(pred_blk))
600 e_prime = phi_translate(expr, block, pos, curr_info->avail_out);
601 v_prime = lookup(e_prime);
605 pred_info = get_block_info(pred_blk);
606 e_dprime = ir_valueset_lookup(pred_info->avail_out, v_prime);
608 if (e_dprime == NULL) {
609 pred_info->avail = e_prime;
610 pred_info->not_found = 1;
613 pred_info->avail = e_dprime;
614 pred_info->not_found = 0;
615 mode = get_irn_mode(e_dprime);
620 else if (first_s != e_dprime)
623 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, e_dprime, pred_blk));
627 /* If it's not the same value already existing along every predecessor, and
628 it's defined by some predecessor, it is partially redundant. */
629 if (! all_same && by_some) {
630 ir_node *phi, *l, **in;
632 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
634 in = xmalloc(arity * sizeof(*in));
635 /* for all predecessor blocks */
636 for (pos = 0; pos < arity; ++pos) {
637 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
638 block_info *pred_info = get_block_info(pred_blk);
640 /* ignore bad blocks. */
641 if (is_Bad(pred_blk)) {
646 /* ignore blocks that already have the expression */
647 if (pred_info->not_found) {
648 ir_node *e_prime = pred_info->avail;
650 if (!is_Phi(e_prime)) {
651 ir_node *proj_pred = NULL;
652 if (is_Proj(e_prime)) {
653 ir_node *pred = get_Proj_pred(e_prime);
654 mode = get_irn_mode(pred);
656 get_irn_dbg_info(pred),
657 current_ir_graph, pred_blk,
661 get_irn_in(pred) + 1);
662 copy_node_attr(pred, nn);
664 DB((dbg, LEVEL_1, "New node %+F in block %+F created\n", nn, pred_blk));
667 mode = get_irn_mode(e_prime);
669 get_irn_dbg_info(e_prime),
670 current_ir_graph, pred_blk,
673 get_irn_arity(e_prime),
674 get_irn_in(e_prime) + 1);
675 copy_node_attr(e_prime, nn);
676 if (proj_pred != NULL) {
677 set_Proj_pred(nn, proj_pred);
680 DB((dbg, LEVEL_1, "New node %+F in block %+F created\n", nn, pred_blk));
683 l = add(expr, value);
685 ir_valueset_insert(pred_info->avail_out, add(nn, l), nn);
686 pred_info->avail = nn;
689 in[pos] = pred_info->avail;
691 phi = new_r_Phi(current_ir_graph, block, arity, in, mode);
694 l = add(expr, value);
697 ir_valueset_replace(curr_info->avail_out, value, phi);
698 ir_valueset_insert(curr_info->new_set, value, phi);
700 DB((dbg, LEVEL_1, "New %+F for redundant %+F created\n", phi, expr));
701 ir_valueset_remove_iterator(curr_info->antic_in, &iter);
704 } /* node_set_foreach */
708 * Walker, change nodes by its value if different.
710 * We cannot do the changes right here, as this would change
711 * the hash values of the nodes in the avail_out set!
713 * @param irn the node
714 * @param ctx the walker environment
716 static void eliminate(ir_node *irn, void *ctx) {
719 if (is_no_Block(irn)) {
720 ir_node *block = get_nodes_block(irn);
721 block_info *bl = get_block_info(block);
722 ir_node *value = lookup(irn);
725 ir_node *expr = ir_valueset_lookup(bl->avail_out, value);
727 if (expr != NULL && expr != irn) {
728 elim_pair *p = obstack_alloc(env->obst, sizeof(*p));
732 p->next = env->pairs;
733 p->reason = get_irn_idx(expr) >= env->last_idx ? FS_OPT_GVN_PARTLY : FS_OPT_GVN_FULLY;
741 * Do all the recorded changes and optimize
742 * newly created Phi's.
744 * @param pairs list of elimination pairs
746 static void eliminate_nodes(elim_pair *pairs) {
749 for (p = pairs; p != NULL; p = p->next) {
750 /* might be already changed */
751 p->new_node = skip_Id(p->new_node);
753 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
755 * PRE tends to create Phi(self, self, ... , x, self, self, ...)
756 * which we can optimize here
758 if (is_Phi(p->new_node)) {
762 for (i = get_irn_intra_arity(p->new_node) - 1; i >= 0; --i) {
763 ir_node *pred = get_irn_n(p->new_node, i);
765 if (pred != p->old_node) {
774 exchange(p->new_node, res);
778 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
779 exchange(p->old_node, p->new_node);
781 } /* eliminate_nodes */
784 * Argh: Endless loops cause problems, because the
785 * insert algorithm did not terminate. We get translated nodes that
786 * references the origin. These nodes are translated again and again...
788 * The current fix is to use post-dominance. This simple ignores
789 * endless loops, ie we cannot optimize them.
791 void do_gvn_pre(ir_graph *irg)
795 optimization_state_t state;
797 unsigned antic_iter, insert_iter;
798 ir_node *value, *expr;
800 /* register a debug mask */
801 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
802 firm_dbg_set_mask(dbg, 3);
804 /* edges will crash if enabled due to our allocate on other obstack trick */
805 edges_deactivate(irg);
807 value_table = new_identities();
808 ir_nodemap_init(&value_map);
813 a_env.start_block = get_irg_start_block(irg);
814 a_env.end_block = get_irg_end_block(irg);
817 /* Move Proj's into the same block as their args,
818 else we would assign the result to wrong blocks */
819 normalize_proj_nodes(irg);
821 /* critical edges MUST be removed */
822 remove_critical_cf_edges(irg);
824 /* we need dominator for Antic_out calculation */
826 assure_postdoms(irg);
827 /* we get all nodes of a block by following outs */
828 assure_irg_outs(irg);
831 * Switch on GCSE. We need it to correctly compute
832 * the leader of a node by hashing.
834 save_optimization_state(&state);
835 set_opt_global_cse(1);
837 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
839 /* allocate block info for all blocks */
840 irg_walk_blkwise_graph(irg, NULL, topo_walker, &a_env);
842 /* clean the exp_gen set. Doing this here saves the cleanup in the iteration. */
843 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
844 ir_valueset_iterator_t iter;
846 foreach_valueset(bl_info->exp_gen, value, expr, iter) {
847 if (!is_clean_in_block(expr, bl_info->block, bl_info->exp_gen))
848 ir_valueset_remove_iterator(bl_info->exp_gen, &iter);
851 /* compute the available value sets for all blocks */
852 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
854 /* compute the anticipated value sets for all blocks */
856 a_env.first_iter = 1;
858 /* we use the visited flag to mark non-clean nodes */
859 inc_irg_visited(irg);
861 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++antic_iter));
863 postdom_tree_walk_irg(irg, compute_antic, NULL, &a_env);
864 a_env.first_iter = 0;
865 DB((dbg, LEVEL_1, "------------------------\n"));
866 } while (a_env.changes != 0);
868 /* compute redundant expressions */
870 a_env.last_idx = get_irg_last_idx(irg);
872 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++insert_iter));
874 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
875 DB((dbg, LEVEL_1, "------------------------\n"));
876 } while (a_env.changes != 0);
878 /* last step: eliminate nodes */
879 irg_walk_graph(irg, NULL, eliminate, &a_env);
880 eliminate_nodes(a_env.pairs);
882 /* clean up: delete all sets */
883 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
884 ir_valueset_del(bl_info->exp_gen);
885 ir_valueset_del(bl_info->avail_out);
886 ir_valueset_del(bl_info->antic_in);
887 if (bl_info->new_set)
888 ir_valueset_del(bl_info->new_set);
890 del_identities(value_table);
891 ir_nodemap_destroy(&value_map);
892 obstack_free(&obst, NULL);
895 /* pin the graph again: This is needed due to the use of set_opt_global_cse(1) */
896 set_irg_pinned(irg, op_pin_state_pinned);
897 restore_optimization_state(&state);
900 set_irg_outs_inconsistent(irg);
901 set_irg_loopinfo_inconsistent(irg);