2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
29 #include "iroptimize.h"
38 #include "irnodehashmap.h"
39 #include "irnodeset.h"
41 #include "iropt_dbg.h"
45 #include "irgraph_t.h"
49 /** Additional info we need for every block. */
50 typedef struct block_info {
51 ir_valueset_t *exp_gen; /**< The set of expression per block. */
52 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
53 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
54 ir_valueset_t *new_set; /**< The set of all new values for a block. */
55 ir_node *avail; /**< The get_map(avail, block) result. */
56 ir_node *block; /**< The Block of the block info. */
57 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
58 int not_found; /**< Non-zero, if avail was not found in this block. */
62 * A pair of nodes that must be exchanged.
63 * We must defer the exchange because our hash-sets cannot
64 * find an already replace node else.
66 typedef struct elim_pair {
67 ir_node *old_node; /**< The old node that will be replaced. */
68 ir_node *new_node; /**< The new node. */
69 struct elim_pair *next; /**< Links all entries in a list. */
70 int reason; /**< The reason for the replacement. */
73 /** The environment for the GVN-PRE algorithm */
74 typedef struct pre_env {
75 struct obstack *obst; /**< The obstack to allocate on. */
76 ir_node *start_block; /**< The start block of the current graph. */
77 ir_node *end_block; /**< The end block of the current graph */
78 block_info *list; /**< Links all block info entries for easier recovery. */
79 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
80 unsigned last_idx; /**< last node index of "old" nodes, all higher indexes are newly created once. */
81 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
82 char first_iter; /**< non-zero for first iteration */
85 static ir_nodehashmap_t value_map;
87 /** The debug module handle. */
88 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
90 /* ---------- Functions for Value sets ---------- */
92 /** computes dst = dst \/ src for value sets */
93 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
95 ir_valueset_iterator_t iter;
96 ir_node *value, *expr;
98 foreach_valueset(src, value, expr, iter) {
99 ir_valueset_insert(dst, value, expr);
104 /* ---------- Functions for Values ---------- */
107 * Add a node e representing the value v to the set.
109 * @param e a node representing an expression
110 * @param v a node representing a value
112 * @return the final value for the expression e
114 static ir_node *add(ir_node *e, ir_node *v)
117 ir_node *pred = get_Proj_pred(v);
118 ir_node *v_pred = identify_remember(pred);
120 if (v_pred != pred) {
121 /* must create a new value here */
122 v = new_r_Proj(v_pred, get_irn_mode(v), get_Proj_proj(v));
125 v = identify_remember(v);
126 ir_nodehashmap_insert(&value_map, e, v);
131 * Lookup a value in a value set.
133 * @param e a node representing an expression
135 * @return a node representing the value or NULL if
136 * the given expression is not available
138 static ir_node *lookup(ir_node *e)
140 ir_node *value = (ir_node*)ir_nodehashmap_get(&value_map, e);
142 return identify_remember(value);
147 * Return the block info of a block.
149 * @param block the block
151 static block_info *get_block_info(ir_node *block)
153 return (block_info*)get_irn_link(block);
157 * Allocate a block info for a block.
159 * @param block the block
160 * @param env the environment
162 static void alloc_blk_info(ir_node *block, pre_env *env)
164 block_info *info = OALLOC(env->obst, block_info);
166 set_irn_link(block, info);
167 info->exp_gen = ir_valueset_new(16);
168 info->avail_out = ir_valueset_new(16);
169 info->antic_in = ir_valueset_new(16);
170 info->new_set = NULL;
173 info->next = env->list;
176 } /* alloc_blk_info */
179 * Returns non-zero if a node is movable and a possible candidate for PRE.
183 static int is_nice_value(ir_node *n)
188 n = get_Proj_pred(n);
189 if (get_irn_pinned(n) == op_pin_state_pinned)
191 mode = get_irn_mode(n);
192 if (!mode_is_data(mode)) {
193 if (! is_Div(n) && ! is_Mod(n))
195 if (! is_NoMem(get_fragile_op_mem(n)))
199 } /* is_nice_value */
205 * @param set the set to dump
206 * @param txt a text to describe the set
207 * @param block the owner block of the set
209 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
211 ir_valueset_iterator_t iter;
212 ir_node *value, *expr;
215 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
217 foreach_valueset(set, value, expr, iter) {
219 DB((dbg, LEVEL_2, "\n"));
221 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
223 DB((dbg, LEVEL_2, " %+F,", expr));
226 DB((dbg, LEVEL_2, "\n}\n"));
227 } /* dump_value_set */
230 #define dump_value_set(set, txt, block)
231 #endif /* DEBUG_libfirm */
234 * Topological walker. Allocates block info for every block and place nodes in
235 * topological order into the nodes set.
237 static void topo_walker(ir_node *irn, void *ctx)
239 pre_env *env = (pre_env*)ctx;
245 /* the topological walker ensures that blocks are visited before anything else */
246 alloc_blk_info(irn, env);
249 /* GVN step: remember the value */
250 value = add(irn, irn);
252 /* no need to put constants into the sets: they are always redundant */
253 if (! is_nice_value(irn) || is_irn_constlike(irn))
256 /* Do not put mode_T nodes info the sets, or PhiT will be created
257 (which are not allowed in Firm). Instead, put the Proj's here only. */
258 if (get_irn_mode(irn) == mode_T)
261 /* place this node into the set of possible nodes of its block */
262 block = get_nodes_block(irn);
263 info = get_block_info(block);
265 ir_valueset_insert(info->exp_gen, value, irn);
269 * Computes Avail_out(block):
271 * Avail_in(block) = Avail_out(dom(block))
272 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
275 * This function must be called in the top-down dominance order:
276 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
278 * @param block the block
279 * @param ctx walker context
281 static void compute_avail_top_down(ir_node *block, void *ctx)
283 pre_env *env = (pre_env*)ctx;
284 block_info *dom_info;
285 block_info *info = get_block_info(block);
288 /* we don't need the end block Avail */
289 if (block == env->end_block)
293 * First add all nodes from the dominator.
294 * This must be done to ensure that Antic_out contains the leader
295 * for every node. The root has no dominator.
297 if (block != env->start_block) {
298 dom_blk = get_Block_idom(block);
299 assert(is_Block(dom_blk));
301 dom_info = get_block_info(dom_blk);
304 value_union(info->avail_out, dom_info->avail_out);
306 value_union(info->avail_out, info->exp_gen);
308 dump_value_set(info->avail_out, "Avail_out", block);
312 * check if a node n is clean in block block.
315 * @param block the block
316 * @param set a value set, containing the already processed predecessors
318 static int is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *set)
325 if (! is_nice_value(n))
328 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
329 ir_node *pred = get_irn_n(n, i);
332 if (get_nodes_block(pred) != block)
338 if (! is_nice_value(pred))
341 value = lookup(pred);
344 if (! ir_valueset_lookup(set, value))
348 } /* is_clean_in_block */
351 * Implements phi_translate. Translate a expression above a Phi.
353 * @param node the node
354 * @param block the block in which the node is translated
355 * @param pos the input number of the destination block
356 * @param translated the valueset containing the other already translated nodes
358 * @return a node representing the translated value
360 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *translated)
366 if (get_nodes_block(node) == block) {
367 /* a Phi inside our block */
368 return get_Phi_pred(node, pos);
370 /* already outside */
374 arity = get_irn_arity(node);
376 /* check if the node has at least one Phi predecessor */
377 for (i = 0; i < arity; ++i) {
378 ir_node *pred = get_irn_n(node, i);
379 ir_node *leader = lookup(pred);
382 leader = leader != NULL ? leader : pred;
383 trans = (ir_node*)ir_valueset_lookup(translated, leader);
385 if ((trans != NULL && trans != leader) || (is_Phi(leader) && get_nodes_block(leader) == block))
389 /* no translation needed */
394 get_irn_dbg_info(node),
396 get_nodes_block(node),
401 /* We need the attribute copy here, because the Hash value of a
402 node might depend on that. */
403 copy_node_attr(current_ir_graph, node, nn);
405 for (i = 0; i < arity; ++i) {
406 ir_node *pred = get_irn_n(node, i);
407 ir_node *leader = lookup(pred);
410 leader = leader != NULL ? leader : pred;
411 trans = (ir_node*)ir_valueset_lookup(translated, leader);
415 if (is_Phi(trans) && get_nodes_block(trans) == block)
416 set_irn_n(nn, i, get_Phi_pred(trans, pos));
418 set_irn_n(nn, i, trans);
420 nn = optimize_node(nn);
422 } /* phi_translate */
425 * Block-walker, computes Antic_in(block).
427 * @param block the block
428 * @param ctx the walker environment
430 static void compute_antic(ir_node *block, void *ctx)
432 pre_env *env = (pre_env*)ctx;
433 block_info *succ_info;
434 block_info *info = get_block_info(block);
435 ir_node *succ, *value, *expr;
437 ir_valueset_iterator_t iter;
439 /* no need for computations in start block */
440 if (block == env->start_block)
443 size = ir_valueset_size(info->antic_in);
445 /* the end block has no successor */
446 if (block != env->end_block) {
450 * This step puts all generated expression from the current
451 * current block into Antic_in.
452 * It is enough to do this in the first iteration only, because
453 * the set info->exp_gen is not changed anymore.
455 if (env->first_iter) {
456 foreach_valueset(info->exp_gen, value, expr, iter) {
457 ir_valueset_insert(info->antic_in, value, expr);
461 n_succ = get_Block_n_cfg_outs(block);
465 /* find blocks position in succ's block predecessors */
466 succ = get_Block_cfg_out(block, 0);
467 pos = get_Block_cfgpred_pos(succ, block);
470 succ_info = get_block_info(succ);
471 /* translate into list: we cannot insert into a set we iterate
472 * and succ might be equal to block for endless loops */
473 foreach_valueset(succ_info->antic_in, value, expr, iter) {
474 ir_node *trans = phi_translate(expr, succ, pos, info->antic_in);
476 if (is_clean_in_block(trans, block, info->antic_in))
477 ir_valueset_insert(info->antic_in, value, trans);
479 } else { /* n_succ > 1 */
481 block_info *succ0_info;
486 /* Select a successor to compute the disjoint of all Nodes
487 sets, it might be useful to select the block with the
488 smallest number of nodes. For simplicity we choose the
490 succ0 = get_Block_cfg_out(block, 0);
491 succ0_info = get_block_info(succ0);
492 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
493 /* we need the disjoint */
494 for (i = 1; i < n_succ; ++i) {
495 ir_node *succ = get_Block_cfg_out(block, i);
496 block_info *succ_info = get_block_info(succ);
497 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL)
501 /* we found a value that is common in all Antic_in(succ(b)),
502 put it in Antic_in(b) if the value is NOT already represented. */
503 if (is_clean_in_block(expr, block, info->antic_in))
504 ir_valueset_insert(info->antic_in, value, expr);
510 /* we do not need a clean here, because we ensure that only cleaned nodes are in exp_gen
511 * and all other sets */
513 dump_value_set(info->antic_in, "Antic_in", block);
514 if (size != ir_valueset_size(info->antic_in)) {
515 /* the Antic_in set has changed */
518 } /* compute_antic */
521 * Perform insertion of partially redundant values.
522 * For every Block node, do the following:
523 * 1. Propagate the NEW_SETS of the dominator into the current block.
524 * If the block has multiple predecessors,
525 * 2a. Iterate over the ANTIC expressions for the block to see if
526 * any of them are partially redundant.
527 * 2b. If so, insert them into the necessary predecessors to make
528 * the expression fully redundant.
529 * 2c. Insert a new Phi merging the values of the predecessors.
530 * 2d. Insert the new Phi, and the new expressions, into the
533 * @param block the block
534 * @param ctx the walker environment
536 static void insert_nodes(ir_node *block, void *ctx)
538 pre_env *env = (pre_env*)ctx;
539 ir_node *value, *expr, *idom, *first_s;
540 block_info *curr_info, *idom_info;
541 int pos, arity = get_irn_arity(block);
542 int all_same, by_some, updated;
543 ir_valueset_iterator_t iter;
545 /* ensure that even the start block has a new_set */
546 curr_info = get_block_info(block);
547 if (curr_info->new_set)
548 ir_valueset_del(curr_info->new_set);
549 curr_info->new_set = ir_valueset_new(16);
551 if (block == env->start_block)
554 idom = get_Block_idom(block);
555 idom_info = get_block_info(idom);
557 /* update the new_sets */
559 dump_value_set(idom_info->new_set, "[New Set]", idom);
560 foreach_valueset(idom_info->new_set, value, expr, iter) {
561 ir_valueset_insert(curr_info->new_set, value, expr);
562 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
565 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
571 /* convert the set into a list. This allows the removal of
572 * elements from the set */
573 foreach_valueset(curr_info->antic_in, value, expr, iter) {
576 /* If the value was already computed in the dominator, then
577 it is totally redundant. Hence we have nothing to insert. */
578 if (ir_valueset_lookup(idom_info->avail_out, value)) {
579 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
588 /* for all predecessor blocks */
589 for (pos = 0; pos < arity; ++pos) {
590 block_info *pred_info;
591 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
592 ir_node *e_prime, *v_prime, *e_dprime;
594 /* ignore bad blocks. */
595 if (is_Bad(pred_blk))
598 e_prime = phi_translate(expr, block, pos, curr_info->avail_out);
599 v_prime = lookup(e_prime);
603 pred_info = get_block_info(pred_blk);
604 e_dprime = (ir_node*)ir_valueset_lookup(pred_info->avail_out, v_prime);
606 if (e_dprime == NULL) {
607 pred_info->avail = e_prime;
608 pred_info->not_found = 1;
611 pred_info->avail = e_dprime;
612 pred_info->not_found = 0;
613 mode = get_irn_mode(e_dprime);
618 else if (first_s != e_dprime)
621 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, e_dprime, pred_blk));
625 /* If it's not the same value already existing along every predecessor, and
626 it's defined by some predecessor, it is partially redundant. */
627 if (! all_same && by_some) {
628 ir_node *phi, *l, **in;
630 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
632 in = XMALLOCN(ir_node*, arity);
633 /* for all predecessor blocks */
634 for (pos = 0; pos < arity; ++pos) {
635 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
636 block_info *pred_info = get_block_info(pred_blk);
638 /* ignore bad blocks. */
639 if (is_Bad(pred_blk)) {
640 ir_graph *irg = get_irn_irg(pred_blk);
641 in[pos] = new_r_Bad(irg, mode_X);
645 /* ignore blocks that already have the expression */
646 if (pred_info->not_found) {
647 ir_node *e_prime = pred_info->avail;
649 if (!is_Phi(e_prime)) {
650 ir_node *proj_pred = NULL;
651 if (is_Proj(e_prime)) {
652 ir_node *pred = get_Proj_pred(e_prime);
653 mode = get_irn_mode(pred);
655 get_irn_dbg_info(pred),
656 current_ir_graph, pred_blk,
660 get_irn_in(pred) + 1);
661 copy_node_attr(current_ir_graph, pred, nn);
663 DB((dbg, LEVEL_1, "New node %+F in block %+F created\n", nn, pred_blk));
666 mode = get_irn_mode(e_prime);
668 get_irn_dbg_info(e_prime),
669 current_ir_graph, pred_blk,
672 get_irn_arity(e_prime),
673 get_irn_in(e_prime) + 1);
674 copy_node_attr(current_ir_graph, e_prime, nn);
675 if (proj_pred != NULL) {
676 set_Proj_pred(nn, proj_pred);
679 DB((dbg, LEVEL_1, "New node %+F in block %+F created\n", nn, pred_blk));
682 l = add(expr, value);
684 ir_valueset_insert(pred_info->avail_out, add(nn, l), nn);
685 pred_info->avail = nn;
688 in[pos] = pred_info->avail;
690 phi = new_r_Phi(block, arity, in, mode);
693 l = add(expr, value);
696 ir_valueset_replace(curr_info->avail_out, value, phi);
697 ir_valueset_insert(curr_info->new_set, value, phi);
699 DB((dbg, LEVEL_1, "New %+F for redundant %+F created\n", phi, expr));
700 ir_valueset_remove_iterator(curr_info->antic_in, &iter);
703 } /* node_set_foreach */
707 * Walker, change nodes by its value if different.
709 * We cannot do the changes right here, as this would change
710 * the hash values of the nodes in the avail_out set!
712 * @param irn the node
713 * @param ctx the walker environment
715 static void eliminate(ir_node *irn, void *ctx)
717 pre_env *env = (pre_env*)ctx;
719 if (!is_Block(irn)) {
720 ir_node *block = get_nodes_block(irn);
721 block_info *bl = get_block_info(block);
722 ir_node *value = lookup(irn);
725 ir_node *expr = (ir_node*)ir_valueset_lookup(bl->avail_out, value);
727 if (expr != NULL && expr != irn) {
728 elim_pair *p = OALLOC(env->obst, elim_pair);
732 p->next = env->pairs;
733 p->reason = get_irn_idx(expr) >= env->last_idx ? FS_OPT_GVN_PARTLY : FS_OPT_GVN_FULLY;
741 * Do all the recorded changes and optimize
742 * newly created Phi's.
744 * @param pairs list of elimination pairs
746 static void eliminate_nodes(elim_pair *pairs)
750 for (p = pairs; p != NULL; p = p->next) {
751 /* might be already changed */
752 p->new_node = skip_Id(p->new_node);
754 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
756 * PRE tends to create Phi(self, self, ... , x, self, self, ...)
757 * which we can optimize here
759 if (is_Phi(p->new_node)) {
763 for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
764 ir_node *pred = get_irn_n(p->new_node, i);
766 if (pred != p->old_node) {
775 exchange(p->new_node, res);
779 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
780 exchange(p->old_node, p->new_node);
782 } /* eliminate_nodes */
785 * Argh: Endless loops cause problems, because the
786 * insert algorithm did not terminate. We get translated nodes that
787 * references the origin. These nodes are translated again and again...
789 * The current fix is to use post-dominance. This simple ignores
790 * endless loops, i.e. we cannot optimize them.
792 void do_gvn_pre(ir_graph *irg)
796 optimization_state_t state;
798 unsigned antic_iter, insert_iter;
799 ir_node *value, *expr;
801 /* register a debug mask */
802 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
804 /* edges will crash if enabled due to our allocate on other obstack trick */
805 edges_deactivate(irg);
808 ir_nodehashmap_init(&value_map);
813 a_env.start_block = get_irg_start_block(irg);
814 a_env.end_block = get_irg_end_block(irg);
817 /* critical edges MUST be removed */
818 remove_critical_cf_edges(irg);
820 /* we need dominator for Antic_out calculation */
822 assure_postdoms(irg);
823 /* we get all nodes of a block by following outs */
824 assure_irg_outs(irg);
827 * Switch on GCSE. We need it to correctly compute
828 * the value of a node, which is independent from
831 save_optimization_state(&state);
832 set_opt_global_cse(1);
834 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
836 /* allocate block info for all blocks */
837 irg_walk_blkwise_dom_top_down(irg, NULL, topo_walker, &a_env);
839 /* clean the exp_gen set. Doing this here saves the cleanup in the iteration. */
840 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
841 ir_valueset_iterator_t iter;
843 foreach_valueset(bl_info->exp_gen, value, expr, iter) {
844 if (!is_clean_in_block(expr, bl_info->block, bl_info->exp_gen))
845 ir_valueset_remove_iterator(bl_info->exp_gen, &iter);
848 /* compute the available value sets for all blocks */
849 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
851 /* compute the anticipated value sets for all blocks */
853 a_env.first_iter = 1;
855 /* we use the visited flag to mark non-clean nodes */
856 inc_irg_visited(irg);
858 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++antic_iter));
860 postdom_tree_walk_irg(irg, compute_antic, NULL, &a_env);
861 a_env.first_iter = 0;
862 DB((dbg, LEVEL_1, "------------------------\n"));
863 } while (a_env.changes != 0);
865 /* compute redundant expressions */
867 a_env.last_idx = get_irg_last_idx(irg);
869 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++insert_iter));
871 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
872 DB((dbg, LEVEL_1, "------------------------\n"));
873 } while (a_env.changes != 0);
875 /* last step: eliminate nodes */
876 irg_walk_graph(irg, NULL, eliminate, &a_env);
877 eliminate_nodes(a_env.pairs);
879 /* clean up: delete all sets */
880 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
881 ir_valueset_del(bl_info->exp_gen);
882 ir_valueset_del(bl_info->avail_out);
883 ir_valueset_del(bl_info->antic_in);
884 if (bl_info->new_set)
885 ir_valueset_del(bl_info->new_set);
887 ir_nodehashmap_destroy(&value_map);
888 obstack_free(&obst, NULL);
890 /* pin the graph again: This is needed due to the use of set_opt_global_cse(1) */
891 set_irg_pinned(irg, op_pin_state_pinned);
892 restore_optimization_state(&state);
895 /* Creates an ir_graph pass for do_gvn_pre. */
896 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
898 return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
899 } /* do_gvn_pre_pass */