2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
38 #include "irnodemap.h"
39 #include "irnodeset.h"
41 #include "iropt_dbg.h"
45 #include "irgraph_t.h"
49 /** Additional info we need for every block. */
50 typedef struct block_info {
51 ir_valueset_t *exp_gen; /**< The set of expression per block. */
52 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
53 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
54 ir_valueset_t *new_set; /**< The set of all new values for a block. */
55 ir_node *avail; /**< The get_map(avail, block) result. */
56 ir_node *block; /**< The Block of the block info. */
57 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
58 int not_found; /**< Non-zero, if avail was not found in this block. */
62 * A pair of nodes that must be exchanged.
63 * We must defer the exchange because our hash-sets cannot
64 * find an already replace node else.
66 typedef struct elim_pair {
67 ir_node *old_node; /**< The old node that will be replaced. */
68 ir_node *new_node; /**< The new node. */
69 struct elim_pair *next; /**< Links all entries in a list. */
70 int reason; /**< The reason for the replacement. */
73 /** The environment for the GVN-PRE algorithm */
74 typedef struct pre_env {
75 struct obstack *obst; /**< The obstack to allocate on. */
76 ir_node *start_block; /**< The start block of the current graph. */
77 ir_node *end_block; /**< The end block of the current graph */
78 block_info *list; /**< Links all block info entries for easier recovery. */
79 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
80 unsigned last_idx; /**< last node index of "old" nodes, all higher indexes are newly created once. */
81 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
82 char first_iter; /**< non-zero for first iteration */
85 static pset *value_table;
86 static ir_nodemap_t value_map;
88 /** The debug module handle. */
89 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
91 /* ---------- Functions for Value sets ---------- */
93 /** computes dst = dst \/ src for value sets */
94 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
96 ir_valueset_iterator_t iter;
97 ir_node *value, *expr;
99 foreach_valueset(src, value, expr, iter) {
100 ir_valueset_insert(dst, value, expr);
105 /* ---------- Functions for Values ---------- */
108 * Add a node e representing the value v to the set.
110 * @param e a node representing an expression
111 * @param v a node representing a value
113 * @return the final value for the expression e
115 static ir_node *add(ir_node *e, ir_node *v)
118 ir_node *pred = get_Proj_pred(v);
119 ir_node *v_pred = identify_remember(value_table, pred);
121 if (v_pred != pred) {
122 /* must create a new value here */
123 v = new_r_Proj(v_pred, get_irn_mode(v), get_Proj_proj(v));
126 v = identify_remember(value_table, v);
127 ir_nodemap_insert(&value_map, e, v);
132 * Lookup a value in a value set.
134 * @param e a node representing an expression
136 * @return a node representing the value or NULL if
137 * the given expression is not available
139 static ir_node *lookup(ir_node *e)
141 ir_node *value = ir_nodemap_get(&value_map, e);
143 return identify_remember(value_table, value);
148 * Return the block info of a block.
150 * @param block the block
152 static block_info *get_block_info(ir_node *block)
154 return get_irn_link(block);
155 } /* get_block_info */
158 * Allocate a block info for a block.
160 * @param block the block
161 * @param env the environment
163 static void alloc_blk_info(ir_node *block, pre_env *env)
165 block_info *info = OALLOC(env->obst, block_info);
167 set_irn_link(block, info);
168 info->exp_gen = ir_valueset_new(16);
169 info->avail_out = ir_valueset_new(16);
170 info->antic_in = ir_valueset_new(16);
171 info->new_set = NULL;
174 info->next = env->list;
177 } /* alloc_blk_info */
180 * Returns non-zero if a node is movable and a possible candidate for PRE.
184 static int is_nice_value(ir_node *n)
189 n = get_Proj_pred(n);
190 if (get_irn_pinned(n) == op_pin_state_pinned)
192 mode = get_irn_mode(n);
193 if (!mode_is_data(mode)) {
194 if (! is_Div(n) && ! is_Mod(n) && ! is_DivMod(n))
196 if (! is_NoMem(get_fragile_op_mem(n)))
200 } /* is_nice_value */
206 * @param set the set to dump
207 * @param txt a text to describe the set
208 * @param block the owner block of the set
210 static void dump_value_set(ir_valueset_t *set, char *txt, ir_node *block)
212 ir_valueset_iterator_t iter;
213 ir_node *value, *expr;
216 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
218 foreach_valueset(set, value, expr, iter) {
220 DB((dbg, LEVEL_2, "\n"));
222 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
224 DB((dbg, LEVEL_2, " %+F,", expr));
227 DB((dbg, LEVEL_2, "\n}\n"));
228 } /* dump_value_set */
231 #define dump_value_set(set, txt, block)
232 #endif /* DEBUG_libfirm */
235 * Topological walker. Allocates block info for every block and place nodes in topological
236 * order into the nodes set.
238 static void topo_walker(ir_node *irn, void *ctx)
246 /* the topological walker ensures that blocks are visited before anything else */
247 alloc_blk_info(irn, env);
250 /* GVN step: remember the value */
251 value = add(irn, irn);
253 /* no need to put constants into the sets: they are always redundant */
254 if (! is_nice_value(irn) || is_irn_constlike(irn))
257 /* Do not put mode_T nodes info the sets, or PhiT will be created
258 (which are not allowed in Firm). Instead, put the Proj's here only. */
259 if (get_irn_mode(irn) == mode_T)
262 /* place this node into the set of possible nodes of its block */
263 block = get_nodes_block(irn);
264 info = get_block_info(block);
266 ir_valueset_insert(info->exp_gen, value, irn);
270 * Computes Avail_out(block):
272 * Avail_in(block) = Avail_out(dom(block))
273 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
276 * This function must be called in the top-down dominance order:
277 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
279 * @param block the block
280 * @param ctx walker context
282 static void compute_avail_top_down(ir_node *block, void *ctx)
285 block_info *dom_info;
286 block_info *info = get_block_info(block);
289 /* we don't need the end block Avail */
290 if (block == env->end_block)
294 * First add all nodes from the dominator.
295 * This must be done to ensure that Antic_out contains the leader
296 * for every node. The root has no dominator.
298 if (block != env->start_block) {
299 dom_blk = get_Block_idom(block);
300 assert(is_Block(dom_blk));
302 dom_info = get_block_info(dom_blk);
305 value_union(info->avail_out, dom_info->avail_out);
307 value_union(info->avail_out, info->exp_gen);
309 dump_value_set(info->avail_out, "Avail_out", block);
310 } /* compute_avail_top_down */
313 * check if a node n is clean in block block.
316 * @param block the block
317 * @param set a value set, containing the already processed predecessors
319 static int is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *set)
326 if (! is_nice_value(n))
329 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
330 ir_node *pred = get_irn_n(n, i);
333 if (get_nodes_block(pred) != block)
339 if (! is_nice_value(pred))
342 value = lookup(pred);
345 if (! ir_valueset_lookup(set, value))
349 } /* is_clean_in_block */
352 * Implements phi_translate. Translate a expression above a Phi.
354 * @param node the node
355 * @param block the block in which the node is translated
356 * @param pos the input number of the destination block
357 * @param translated the valueset containing the other already translated nodes
359 * @return a node representing the translated value
361 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *translated)
367 if (get_nodes_block(node) == block) {
368 /* a Phi inside our block */
369 return get_Phi_pred(node, pos);
371 /* already outside */
375 arity = get_irn_intra_arity(node);
377 /* check if the node has at least one Phi predecessor */
378 for (i = 0; i < arity; ++i) {
379 ir_node *pred = get_irn_intra_n(node, i);
380 ir_node *leader = lookup(pred);
383 leader = leader != NULL ? leader : pred;
384 trans = ir_valueset_lookup(translated, leader);
386 if ((trans != NULL && trans != leader) || (is_Phi(leader) && get_nodes_block(leader) == block))
390 /* no translation needed */
395 get_irn_dbg_info(node),
402 /* We need the attribute copy here, because the Hash value of a
403 node might depend on that. */
404 copy_node_attr(current_ir_graph, node, nn);
406 set_nodes_block(nn, get_nodes_block(node));
407 for (i = 0; i < arity; ++i) {
408 ir_node *pred = get_irn_intra_n(node, i);
409 ir_node *leader = lookup(pred);
412 leader = leader != NULL ? leader : pred;
413 trans = ir_valueset_lookup(translated, leader);
417 if (is_Phi(trans) && get_nodes_block(trans) == block)
418 set_irn_n(nn, i, get_Phi_pred(trans, pos));
420 set_irn_n(nn, i, trans);
422 nn = optimize_node(nn);
424 } /* phi_translate */
427 * Block-walker, computes Antic_in(block).
429 * @param block the block
430 * @param ctx the walker environment
432 static void compute_antic(ir_node *block, void *ctx)
435 block_info *succ_info;
436 block_info *info = get_block_info(block);
437 ir_node *succ, *value, *expr;
439 ir_valueset_iterator_t iter;
441 /* no need for computations in start block */
442 if (block == env->start_block)
445 size = ir_valueset_size(info->antic_in);
447 /* the end block has no successor */
448 if (block != env->end_block) {
452 * This step puts all generated expression from the current
453 * current block into Antic_in.
454 * It is enough to do this in the first iteration only, because
455 * the set info->exp_gen is not changed anymore.
457 if (env->first_iter) {
458 foreach_valueset(info->exp_gen, value, expr, iter) {
459 ir_valueset_insert(info->antic_in, value, expr);
463 n_succ = get_Block_n_cfg_outs(block);
467 /* find blocks position in succ's block predecessors */
468 succ = get_Block_cfg_out(block, 0);
469 pos = get_Block_cfgpred_pos(succ, block);
472 succ_info = get_block_info(succ);
473 /* translate into list: we cannot insert into a set we iterate
474 * and succ might be equal to block for endless loops */
475 foreach_valueset(succ_info->antic_in, value, expr, iter) {
476 ir_node *trans = phi_translate(expr, succ, pos, info->antic_in);
478 if (is_clean_in_block(trans, block, info->antic_in))
479 ir_valueset_insert(info->antic_in, value, trans);
481 } else { /* n_succ > 1 */
483 block_info *succ0_info;
488 /* Select a successor to compute the disjoint of all Nodes
489 sets, it might be useful to select the block with the
490 smallest number of nodes. For simplicity we choose the
492 succ0 = get_Block_cfg_out(block, 0);
493 succ0_info = get_block_info(succ0);
494 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
495 /* we need the disjoint */
496 for (i = 1; i < n_succ; ++i) {
497 ir_node *succ = get_Block_cfg_out(block, i);
498 block_info *succ_info = get_block_info(succ);
499 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL)
503 /* we found a value that is common in all Antic_in(succ(b)),
504 put it in Antic_in(b) if the value is NOT already represented. */
505 if (is_clean_in_block(expr, block, info->antic_in))
506 ir_valueset_insert(info->antic_in, value, expr);
512 /* we do not need a clean here, because we ensure that only cleaned nodes are in exp_gen
513 * and all other sets */
515 dump_value_set(info->antic_in, "Antic_in", block);
516 if (size != ir_valueset_size(info->antic_in)) {
517 /* the Antic_in set has changed */
520 } /* compute_antic */
523 * Perform insertion of partially redundant values.
524 * For every Block node, do the following:
525 * 1. Propagate the NEW_SETS of the dominator into the current block.
526 * If the block has multiple predecessors,
527 * 2a. Iterate over the ANTIC expressions for the block to see if
528 * any of them are partially redundant.
529 * 2b. If so, insert them into the necessary predecessors to make
530 * the expression fully redundant.
531 * 2c. Insert a new Phi merging the values of the predecessors.
532 * 2d. Insert the new Phi, and the new expressions, into the
535 * @param block the block
536 * @param ctx the walker environment
538 static void insert_nodes(ir_node *block, void *ctx)
541 ir_node *value, *expr, *idom, *first_s, *worklist;
542 block_info *curr_info, *idom_info;
543 int pos, arity = get_irn_intra_arity(block);
544 int all_same, by_some, updated;
545 ir_valueset_iterator_t iter;
547 /* ensure that even the start block has a new_set */
548 curr_info = get_block_info(block);
549 if (curr_info->new_set)
550 ir_valueset_del(curr_info->new_set);
551 curr_info->new_set = ir_valueset_new(16);
553 if (block == env->start_block)
556 idom = get_Block_idom(block);
557 idom_info = get_block_info(idom);
559 /* update the new_sets */
561 dump_value_set(idom_info->new_set, "[New Set]", idom);
562 foreach_valueset(idom_info->new_set, value, expr, iter) {
563 ir_valueset_insert(curr_info->new_set, value, expr);
564 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
567 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
573 /* convert the set into a list. This allows the removal of
574 * elements from the set */
576 foreach_valueset(curr_info->antic_in, value, expr, iter) {
579 /* If the value was already computed in the dominator, then
580 it is totally redundant. Hence we have nothing to insert. */
581 if (ir_valueset_lookup(idom_info->avail_out, value)) {
582 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
591 /* for all predecessor blocks */
592 for (pos = 0; pos < arity; ++pos) {
593 block_info *pred_info;
594 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
595 ir_node *e_prime, *v_prime, *e_dprime;
597 /* ignore bad blocks. */
598 if (is_Bad(pred_blk))
601 e_prime = phi_translate(expr, block, pos, curr_info->avail_out);
602 v_prime = lookup(e_prime);
606 pred_info = get_block_info(pred_blk);
607 e_dprime = ir_valueset_lookup(pred_info->avail_out, v_prime);
609 if (e_dprime == NULL) {
610 pred_info->avail = e_prime;
611 pred_info->not_found = 1;
614 pred_info->avail = e_dprime;
615 pred_info->not_found = 0;
616 mode = get_irn_mode(e_dprime);
621 else if (first_s != e_dprime)
624 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, e_dprime, pred_blk));
628 /* If it's not the same value already existing along every predecessor, and
629 it's defined by some predecessor, it is partially redundant. */
630 if (! all_same && by_some) {
631 ir_node *phi, *l, **in;
633 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
635 in = XMALLOCN(ir_node*, arity);
636 /* for all predecessor blocks */
637 for (pos = 0; pos < arity; ++pos) {
638 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
639 block_info *pred_info = get_block_info(pred_blk);
641 /* ignore bad blocks. */
642 if (is_Bad(pred_blk)) {
647 /* ignore blocks that already have the expression */
648 if (pred_info->not_found) {
649 ir_node *e_prime = pred_info->avail;
651 if (!is_Phi(e_prime)) {
652 ir_node *proj_pred = NULL;
653 if (is_Proj(e_prime)) {
654 ir_node *pred = get_Proj_pred(e_prime);
655 mode = get_irn_mode(pred);
657 get_irn_dbg_info(pred),
658 current_ir_graph, pred_blk,
662 get_irn_in(pred) + 1);
663 copy_node_attr(current_ir_graph, pred, nn);
665 DB((dbg, LEVEL_1, "New node %+F in block %+F created\n", nn, pred_blk));
668 mode = get_irn_mode(e_prime);
670 get_irn_dbg_info(e_prime),
671 current_ir_graph, pred_blk,
674 get_irn_arity(e_prime),
675 get_irn_in(e_prime) + 1);
676 copy_node_attr(current_ir_graph, e_prime, nn);
677 if (proj_pred != NULL) {
678 set_Proj_pred(nn, proj_pred);
681 DB((dbg, LEVEL_1, "New node %+F in block %+F created\n", nn, pred_blk));
684 l = add(expr, value);
686 ir_valueset_insert(pred_info->avail_out, add(nn, l), nn);
687 pred_info->avail = nn;
690 in[pos] = pred_info->avail;
692 phi = new_r_Phi(block, arity, in, mode);
695 l = add(expr, value);
698 ir_valueset_replace(curr_info->avail_out, value, phi);
699 ir_valueset_insert(curr_info->new_set, value, phi);
701 DB((dbg, LEVEL_1, "New %+F for redundant %+F created\n", phi, expr));
702 ir_valueset_remove_iterator(curr_info->antic_in, &iter);
705 } /* node_set_foreach */
709 * Walker, change nodes by its value if different.
711 * We cannot do the changes right here, as this would change
712 * the hash values of the nodes in the avail_out set!
714 * @param irn the node
715 * @param ctx the walker environment
717 static void eliminate(ir_node *irn, void *ctx)
721 if (is_no_Block(irn)) {
722 ir_node *block = get_nodes_block(irn);
723 block_info *bl = get_block_info(block);
724 ir_node *value = lookup(irn);
727 ir_node *expr = ir_valueset_lookup(bl->avail_out, value);
729 if (expr != NULL && expr != irn) {
730 elim_pair *p = OALLOC(env->obst, elim_pair);
734 p->next = env->pairs;
735 p->reason = get_irn_idx(expr) >= env->last_idx ? FS_OPT_GVN_PARTLY : FS_OPT_GVN_FULLY;
743 * Do all the recorded changes and optimize
744 * newly created Phi's.
746 * @param pairs list of elimination pairs
748 static void eliminate_nodes(elim_pair *pairs)
752 for (p = pairs; p != NULL; p = p->next) {
753 /* might be already changed */
754 p->new_node = skip_Id(p->new_node);
756 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
758 * PRE tends to create Phi(self, self, ... , x, self, self, ...)
759 * which we can optimize here
761 if (is_Phi(p->new_node)) {
765 for (i = get_irn_intra_arity(p->new_node) - 1; i >= 0; --i) {
766 ir_node *pred = get_irn_n(p->new_node, i);
768 if (pred != p->old_node) {
777 exchange(p->new_node, res);
781 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
782 exchange(p->old_node, p->new_node);
784 } /* eliminate_nodes */
787 * Argh: Endless loops cause problems, because the
788 * insert algorithm did not terminate. We get translated nodes that
789 * references the origin. These nodes are translated again and again...
791 * The current fix is to use post-dominance. This simple ignores
792 * endless loops, i.e. we cannot optimize them.
794 void do_gvn_pre(ir_graph *irg)
798 optimization_state_t state;
800 unsigned antic_iter, insert_iter;
801 ir_node *value, *expr;
803 /* register a debug mask */
804 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
806 /* edges will crash if enabled due to our allocate on other obstack trick */
807 edges_deactivate(irg);
809 value_table = new_identities();
810 ir_nodemap_init(&value_map);
815 a_env.start_block = get_irg_start_block(irg);
816 a_env.end_block = get_irg_end_block(irg);
819 /* Move Proj's into the same block as their args,
820 else we would assign the result to wrong blocks */
821 normalize_proj_nodes(irg);
823 /* critical edges MUST be removed */
824 remove_critical_cf_edges(irg);
826 /* we need dominator for Antic_out calculation */
828 assure_postdoms(irg);
829 /* we get all nodes of a block by following outs */
830 assure_irg_outs(irg);
833 * Switch on GCSE. We need it to correctly compute
834 * the value of a node, which is independent from
837 save_optimization_state(&state);
838 set_opt_global_cse(1);
840 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
842 /* allocate block info for all blocks */
843 irg_walk_blkwise_dom_top_down(irg, NULL, topo_walker, &a_env);
845 /* clean the exp_gen set. Doing this here saves the cleanup in the iteration. */
846 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
847 ir_valueset_iterator_t iter;
849 foreach_valueset(bl_info->exp_gen, value, expr, iter) {
850 if (!is_clean_in_block(expr, bl_info->block, bl_info->exp_gen))
851 ir_valueset_remove_iterator(bl_info->exp_gen, &iter);
854 /* compute the available value sets for all blocks */
855 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
857 /* compute the anticipated value sets for all blocks */
859 a_env.first_iter = 1;
861 /* we use the visited flag to mark non-clean nodes */
862 inc_irg_visited(irg);
864 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++antic_iter));
866 postdom_tree_walk_irg(irg, compute_antic, NULL, &a_env);
867 a_env.first_iter = 0;
868 DB((dbg, LEVEL_1, "------------------------\n"));
869 } while (a_env.changes != 0);
871 /* compute redundant expressions */
873 a_env.last_idx = get_irg_last_idx(irg);
875 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++insert_iter));
877 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
878 DB((dbg, LEVEL_1, "------------------------\n"));
879 } while (a_env.changes != 0);
881 /* last step: eliminate nodes */
882 irg_walk_graph(irg, NULL, eliminate, &a_env);
883 eliminate_nodes(a_env.pairs);
885 /* clean up: delete all sets */
886 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
887 ir_valueset_del(bl_info->exp_gen);
888 ir_valueset_del(bl_info->avail_out);
889 ir_valueset_del(bl_info->antic_in);
890 if (bl_info->new_set)
891 ir_valueset_del(bl_info->new_set);
893 del_identities(value_table);
894 ir_nodemap_destroy(&value_map);
895 obstack_free(&obst, NULL);
898 /* pin the graph again: This is needed due to the use of set_opt_global_cse(1) */
899 set_irg_pinned(irg, op_pin_state_pinned);
900 restore_optimization_state(&state);
903 set_irg_outs_inconsistent(irg);
904 set_irg_loopinfo_inconsistent(irg);
908 /* Creates an ir_graph pass for do_gvn_pre. */
909 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
911 return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
912 } /* do_gvn_pre_pass */