2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
30 #include "iroptimize.h"
39 #include "irnodemap.h"
40 #include "irnodeset.h"
42 #include "iropt_dbg.h"
46 #include "irgraph_t.h"
50 /** Additional info we need for every block. */
51 typedef struct block_info {
52 ir_valueset_t *exp_gen; /**< The set of expression per block. */
53 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
54 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
55 ir_valueset_t *new_set; /**< The set of all new values for a block. */
56 ir_node *avail; /**< The get_map(avail, block) result. */
57 ir_node *block; /**< The Block of the block info. */
58 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
59 int not_found; /**< Non-zero, if avail was not found in this block. */
63 * A pair of nodes that must be exchanged.
64 * We must defer the exchange because our hash-sets cannot
65 * find an already replace node else.
67 typedef struct elim_pair {
68 ir_node *old_node; /**< The old node that will be replaced. */
69 ir_node *new_node; /**< The new node. */
70 struct elim_pair *next; /**< Links all entries in a list. */
71 int reason; /**< The reason for the replacement. */
74 /** The environment for the GVN-PRE algorithm */
75 typedef struct pre_env {
76 struct obstack *obst; /**< The obstack to allocate on. */
77 ir_node *start_block; /**< The start block of the current graph. */
78 ir_node *end_block; /**< The end block of the current graph */
79 block_info *list; /**< Links all block info entries for easier recovery. */
80 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
81 unsigned last_idx; /**< last node index of "old" nodes, all higher indexes are newly created once. */
82 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
83 char first_iter; /**< non-zero for first iteration */
86 static pset *value_table;
87 static ir_nodemap_t value_map;
89 /** The debug module handle. */
90 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
92 /* ---------- Functions for Value sets ---------- */
94 /** computes dst = dst \/ src for value sets */
95 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
97 ir_valueset_iterator_t iter;
98 ir_node *value, *expr;
100 foreach_valueset(src, value, expr, iter) {
101 ir_valueset_insert(dst, value, expr);
106 /* ---------- Functions for Values ---------- */
109 * Add a node e representing the value v to the set.
111 * @param e a node representing an expression
112 * @param v a node representing a value
114 * @return the final value for the expression e
116 static ir_node *add(ir_node *e, ir_node *v)
119 ir_node *pred = get_Proj_pred(v);
120 ir_node *v_pred = identify_remember(value_table, pred);
122 if (v_pred != pred) {
123 /* must create a new value here */
124 v = new_r_Proj(v_pred, get_irn_mode(v), get_Proj_proj(v));
127 v = identify_remember(value_table, v);
128 ir_nodemap_insert(&value_map, e, v);
133 * Lookup a value in a value set.
135 * @param e a node representing an expression
137 * @return a node representing the value or NULL if
138 * the given expression is not available
140 static ir_node *lookup(ir_node *e)
142 ir_node *value = ir_nodemap_get(&value_map, e);
144 return identify_remember(value_table, value);
149 * Return the block info of a block.
151 * @param block the block
153 static block_info *get_block_info(ir_node *block)
155 return get_irn_link(block);
156 } /* get_block_info */
159 * Allocate a block info for a block.
161 * @param block the block
162 * @param env the environment
164 static void alloc_blk_info(ir_node *block, pre_env *env)
166 block_info *info = OALLOC(env->obst, block_info);
168 set_irn_link(block, info);
169 info->exp_gen = ir_valueset_new(16);
170 info->avail_out = ir_valueset_new(16);
171 info->antic_in = ir_valueset_new(16);
172 info->new_set = NULL;
175 info->next = env->list;
178 } /* alloc_blk_info */
181 * Returns non-zero if a node is movable and a possible candidate for PRE.
185 static int is_nice_value(ir_node *n)
190 n = get_Proj_pred(n);
191 if (get_irn_pinned(n) == op_pin_state_pinned)
193 mode = get_irn_mode(n);
194 if (!mode_is_data(mode)) {
195 if (! is_Div(n) && ! is_Mod(n) && ! is_DivMod(n))
197 if (! is_NoMem(get_fragile_op_mem(n)))
201 } /* is_nice_value */
207 * @param set the set to dump
208 * @param txt a text to describe the set
209 * @param block the owner block of the set
211 static void dump_value_set(ir_valueset_t *set, char *txt, ir_node *block)
213 ir_valueset_iterator_t iter;
214 ir_node *value, *expr;
217 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
219 foreach_valueset(set, value, expr, iter) {
221 DB((dbg, LEVEL_2, "\n"));
223 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
225 DB((dbg, LEVEL_2, " %+F,", expr));
228 DB((dbg, LEVEL_2, "\n}\n"));
229 } /* dump_value_set */
232 #define dump_value_set(set, txt, block)
233 #endif /* DEBUG_libfirm */
236 * Topological walker. Allocates block info for every block and place nodes in topological
237 * order into the nodes set.
239 static void topo_walker(ir_node *irn, void *ctx)
247 /* the topological walker ensures that blocks are visited before anything else */
248 alloc_blk_info(irn, env);
251 /* GVN step: remember the value */
252 value = add(irn, irn);
254 /* no need to put constants into the sets: they are always redundant */
255 if (! is_nice_value(irn) || is_irn_constlike(irn))
258 /* Do not put mode_T nodes info the sets, or PhiT will be created
259 (which are not allowed in Firm). Instead, put the Proj's here only. */
260 if (get_irn_mode(irn) == mode_T)
263 /* place this node into the set of possible nodes of its block */
264 block = get_nodes_block(irn);
265 info = get_block_info(block);
267 ir_valueset_insert(info->exp_gen, value, irn);
271 * Computes Avail_out(block):
273 * Avail_in(block) = Avail_out(dom(block))
274 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
277 * This function must be called in the top-down dominance order:
278 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
280 * @param block the block
281 * @param ctx walker context
283 static void compute_avail_top_down(ir_node *block, void *ctx)
286 block_info *dom_info;
287 block_info *info = get_block_info(block);
290 /* we don't need the end block Avail */
291 if (block == env->end_block)
295 * First add all nodes from the dominator.
296 * This must be done to ensure that Antic_out contains the leader
297 * for every node. The root has no dominator.
299 if (block != env->start_block) {
300 dom_blk = get_Block_idom(block);
301 assert(is_Block(dom_blk));
303 dom_info = get_block_info(dom_blk);
306 value_union(info->avail_out, dom_info->avail_out);
308 value_union(info->avail_out, info->exp_gen);
310 dump_value_set(info->avail_out, "Avail_out", block);
311 } /* compute_avail_top_down */
314 * check if a node n is clean in block block.
317 * @param block the block
318 * @param set a value set, containing the already processed predecessors
320 static int is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *set)
327 if (! is_nice_value(n))
330 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
331 ir_node *pred = get_irn_n(n, i);
334 if (get_nodes_block(pred) != block)
340 if (! is_nice_value(pred))
343 value = lookup(pred);
346 if (! ir_valueset_lookup(set, value))
350 } /* is_clean_in_block */
353 * Implements phi_translate. Translate a expression above a Phi.
355 * @param node the node
356 * @param block the block in which the node is translated
357 * @param pos the input number of the destination block
358 * @param translated the valueset containing the other already translated nodes
360 * @return a node representing the translated value
362 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *translated)
368 if (get_nodes_block(node) == block) {
369 /* a Phi inside our block */
370 return get_Phi_pred(node, pos);
372 /* already outside */
376 arity = get_irn_arity(node);
378 /* check if the node has at least one Phi predecessor */
379 for (i = 0; i < arity; ++i) {
380 ir_node *pred = get_irn_n(node, i);
381 ir_node *leader = lookup(pred);
384 leader = leader != NULL ? leader : pred;
385 trans = ir_valueset_lookup(translated, leader);
387 if ((trans != NULL && trans != leader) || (is_Phi(leader) && get_nodes_block(leader) == block))
391 /* no translation needed */
396 get_irn_dbg_info(node),
403 /* We need the attribute copy here, because the Hash value of a
404 node might depend on that. */
405 copy_node_attr(current_ir_graph, node, nn);
407 set_nodes_block(nn, get_nodes_block(node));
408 for (i = 0; i < arity; ++i) {
409 ir_node *pred = get_irn_n(node, i);
410 ir_node *leader = lookup(pred);
413 leader = leader != NULL ? leader : pred;
414 trans = ir_valueset_lookup(translated, leader);
418 if (is_Phi(trans) && get_nodes_block(trans) == block)
419 set_irn_n(nn, i, get_Phi_pred(trans, pos));
421 set_irn_n(nn, i, trans);
423 nn = optimize_node(nn);
425 } /* phi_translate */
428 * Block-walker, computes Antic_in(block).
430 * @param block the block
431 * @param ctx the walker environment
433 static void compute_antic(ir_node *block, void *ctx)
436 block_info *succ_info;
437 block_info *info = get_block_info(block);
438 ir_node *succ, *value, *expr;
440 ir_valueset_iterator_t iter;
442 /* no need for computations in start block */
443 if (block == env->start_block)
446 size = ir_valueset_size(info->antic_in);
448 /* the end block has no successor */
449 if (block != env->end_block) {
453 * This step puts all generated expression from the current
454 * current block into Antic_in.
455 * It is enough to do this in the first iteration only, because
456 * the set info->exp_gen is not changed anymore.
458 if (env->first_iter) {
459 foreach_valueset(info->exp_gen, value, expr, iter) {
460 ir_valueset_insert(info->antic_in, value, expr);
464 n_succ = get_Block_n_cfg_outs(block);
468 /* find blocks position in succ's block predecessors */
469 succ = get_Block_cfg_out(block, 0);
470 pos = get_Block_cfgpred_pos(succ, block);
473 succ_info = get_block_info(succ);
474 /* translate into list: we cannot insert into a set we iterate
475 * and succ might be equal to block for endless loops */
476 foreach_valueset(succ_info->antic_in, value, expr, iter) {
477 ir_node *trans = phi_translate(expr, succ, pos, info->antic_in);
479 if (is_clean_in_block(trans, block, info->antic_in))
480 ir_valueset_insert(info->antic_in, value, trans);
482 } else { /* n_succ > 1 */
484 block_info *succ0_info;
489 /* Select a successor to compute the disjoint of all Nodes
490 sets, it might be useful to select the block with the
491 smallest number of nodes. For simplicity we choose the
493 succ0 = get_Block_cfg_out(block, 0);
494 succ0_info = get_block_info(succ0);
495 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
496 /* we need the disjoint */
497 for (i = 1; i < n_succ; ++i) {
498 ir_node *succ = get_Block_cfg_out(block, i);
499 block_info *succ_info = get_block_info(succ);
500 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL)
504 /* we found a value that is common in all Antic_in(succ(b)),
505 put it in Antic_in(b) if the value is NOT already represented. */
506 if (is_clean_in_block(expr, block, info->antic_in))
507 ir_valueset_insert(info->antic_in, value, expr);
513 /* we do not need a clean here, because we ensure that only cleaned nodes are in exp_gen
514 * and all other sets */
516 dump_value_set(info->antic_in, "Antic_in", block);
517 if (size != ir_valueset_size(info->antic_in)) {
518 /* the Antic_in set has changed */
521 } /* compute_antic */
524 * Perform insertion of partially redundant values.
525 * For every Block node, do the following:
526 * 1. Propagate the NEW_SETS of the dominator into the current block.
527 * If the block has multiple predecessors,
528 * 2a. Iterate over the ANTIC expressions for the block to see if
529 * any of them are partially redundant.
530 * 2b. If so, insert them into the necessary predecessors to make
531 * the expression fully redundant.
532 * 2c. Insert a new Phi merging the values of the predecessors.
533 * 2d. Insert the new Phi, and the new expressions, into the
536 * @param block the block
537 * @param ctx the walker environment
539 static void insert_nodes(ir_node *block, void *ctx)
542 ir_node *value, *expr, *idom, *first_s, *worklist;
543 block_info *curr_info, *idom_info;
544 int pos, arity = get_irn_arity(block);
545 int all_same, by_some, updated;
546 ir_valueset_iterator_t iter;
548 /* ensure that even the start block has a new_set */
549 curr_info = get_block_info(block);
550 if (curr_info->new_set)
551 ir_valueset_del(curr_info->new_set);
552 curr_info->new_set = ir_valueset_new(16);
554 if (block == env->start_block)
557 idom = get_Block_idom(block);
558 idom_info = get_block_info(idom);
560 /* update the new_sets */
562 dump_value_set(idom_info->new_set, "[New Set]", idom);
563 foreach_valueset(idom_info->new_set, value, expr, iter) {
564 ir_valueset_insert(curr_info->new_set, value, expr);
565 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
568 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
574 /* convert the set into a list. This allows the removal of
575 * elements from the set */
577 foreach_valueset(curr_info->antic_in, value, expr, iter) {
580 /* If the value was already computed in the dominator, then
581 it is totally redundant. Hence we have nothing to insert. */
582 if (ir_valueset_lookup(idom_info->avail_out, value)) {
583 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
592 /* for all predecessor blocks */
593 for (pos = 0; pos < arity; ++pos) {
594 block_info *pred_info;
595 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
596 ir_node *e_prime, *v_prime, *e_dprime;
598 /* ignore bad blocks. */
599 if (is_Bad(pred_blk))
602 e_prime = phi_translate(expr, block, pos, curr_info->avail_out);
603 v_prime = lookup(e_prime);
607 pred_info = get_block_info(pred_blk);
608 e_dprime = ir_valueset_lookup(pred_info->avail_out, v_prime);
610 if (e_dprime == NULL) {
611 pred_info->avail = e_prime;
612 pred_info->not_found = 1;
615 pred_info->avail = e_dprime;
616 pred_info->not_found = 0;
617 mode = get_irn_mode(e_dprime);
622 else if (first_s != e_dprime)
625 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, e_dprime, pred_blk));
629 /* If it's not the same value already existing along every predecessor, and
630 it's defined by some predecessor, it is partially redundant. */
631 if (! all_same && by_some) {
632 ir_node *phi, *l, **in;
634 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
636 in = XMALLOCN(ir_node*, arity);
637 /* for all predecessor blocks */
638 for (pos = 0; pos < arity; ++pos) {
639 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
640 block_info *pred_info = get_block_info(pred_blk);
642 /* ignore bad blocks. */
643 if (is_Bad(pred_blk)) {
648 /* ignore blocks that already have the expression */
649 if (pred_info->not_found) {
650 ir_node *e_prime = pred_info->avail;
652 if (!is_Phi(e_prime)) {
653 ir_node *proj_pred = NULL;
654 if (is_Proj(e_prime)) {
655 ir_node *pred = get_Proj_pred(e_prime);
656 mode = get_irn_mode(pred);
658 get_irn_dbg_info(pred),
659 current_ir_graph, pred_blk,
663 get_irn_in(pred) + 1);
664 copy_node_attr(current_ir_graph, pred, nn);
666 DB((dbg, LEVEL_1, "New node %+F in block %+F created\n", nn, pred_blk));
669 mode = get_irn_mode(e_prime);
671 get_irn_dbg_info(e_prime),
672 current_ir_graph, pred_blk,
675 get_irn_arity(e_prime),
676 get_irn_in(e_prime) + 1);
677 copy_node_attr(current_ir_graph, e_prime, nn);
678 if (proj_pred != NULL) {
679 set_Proj_pred(nn, proj_pred);
682 DB((dbg, LEVEL_1, "New node %+F in block %+F created\n", nn, pred_blk));
685 l = add(expr, value);
687 ir_valueset_insert(pred_info->avail_out, add(nn, l), nn);
688 pred_info->avail = nn;
691 in[pos] = pred_info->avail;
693 phi = new_r_Phi(block, arity, in, mode);
696 l = add(expr, value);
699 ir_valueset_replace(curr_info->avail_out, value, phi);
700 ir_valueset_insert(curr_info->new_set, value, phi);
702 DB((dbg, LEVEL_1, "New %+F for redundant %+F created\n", phi, expr));
703 ir_valueset_remove_iterator(curr_info->antic_in, &iter);
706 } /* node_set_foreach */
710 * Walker, change nodes by its value if different.
712 * We cannot do the changes right here, as this would change
713 * the hash values of the nodes in the avail_out set!
715 * @param irn the node
716 * @param ctx the walker environment
718 static void eliminate(ir_node *irn, void *ctx)
722 if (!is_Block(irn)) {
723 ir_node *block = get_nodes_block(irn);
724 block_info *bl = get_block_info(block);
725 ir_node *value = lookup(irn);
728 ir_node *expr = ir_valueset_lookup(bl->avail_out, value);
730 if (expr != NULL && expr != irn) {
731 elim_pair *p = OALLOC(env->obst, elim_pair);
735 p->next = env->pairs;
736 p->reason = get_irn_idx(expr) >= env->last_idx ? FS_OPT_GVN_PARTLY : FS_OPT_GVN_FULLY;
744 * Do all the recorded changes and optimize
745 * newly created Phi's.
747 * @param pairs list of elimination pairs
749 static void eliminate_nodes(elim_pair *pairs)
753 for (p = pairs; p != NULL; p = p->next) {
754 /* might be already changed */
755 p->new_node = skip_Id(p->new_node);
757 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
759 * PRE tends to create Phi(self, self, ... , x, self, self, ...)
760 * which we can optimize here
762 if (is_Phi(p->new_node)) {
766 for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
767 ir_node *pred = get_irn_n(p->new_node, i);
769 if (pred != p->old_node) {
778 exchange(p->new_node, res);
782 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
783 exchange(p->old_node, p->new_node);
785 } /* eliminate_nodes */
788 * Argh: Endless loops cause problems, because the
789 * insert algorithm did not terminate. We get translated nodes that
790 * references the origin. These nodes are translated again and again...
792 * The current fix is to use post-dominance. This simple ignores
793 * endless loops, i.e. we cannot optimize them.
795 void do_gvn_pre(ir_graph *irg)
799 optimization_state_t state;
801 unsigned antic_iter, insert_iter;
802 ir_node *value, *expr;
804 /* register a debug mask */
805 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
807 /* edges will crash if enabled due to our allocate on other obstack trick */
808 edges_deactivate(irg);
810 value_table = new_identities();
811 ir_nodemap_init(&value_map);
816 a_env.start_block = get_irg_start_block(irg);
817 a_env.end_block = get_irg_end_block(irg);
820 /* Move Proj's into the same block as their args,
821 else we would assign the result to wrong blocks */
822 normalize_proj_nodes(irg);
824 /* critical edges MUST be removed */
825 remove_critical_cf_edges(irg);
827 /* we need dominator for Antic_out calculation */
829 assure_postdoms(irg);
830 /* we get all nodes of a block by following outs */
831 assure_irg_outs(irg);
834 * Switch on GCSE. We need it to correctly compute
835 * the value of a node, which is independent from
838 save_optimization_state(&state);
839 set_opt_global_cse(1);
841 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
843 /* allocate block info for all blocks */
844 irg_walk_blkwise_dom_top_down(irg, NULL, topo_walker, &a_env);
846 /* clean the exp_gen set. Doing this here saves the cleanup in the iteration. */
847 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
848 ir_valueset_iterator_t iter;
850 foreach_valueset(bl_info->exp_gen, value, expr, iter) {
851 if (!is_clean_in_block(expr, bl_info->block, bl_info->exp_gen))
852 ir_valueset_remove_iterator(bl_info->exp_gen, &iter);
855 /* compute the available value sets for all blocks */
856 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
858 /* compute the anticipated value sets for all blocks */
860 a_env.first_iter = 1;
862 /* we use the visited flag to mark non-clean nodes */
863 inc_irg_visited(irg);
865 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++antic_iter));
867 postdom_tree_walk_irg(irg, compute_antic, NULL, &a_env);
868 a_env.first_iter = 0;
869 DB((dbg, LEVEL_1, "------------------------\n"));
870 } while (a_env.changes != 0);
872 /* compute redundant expressions */
874 a_env.last_idx = get_irg_last_idx(irg);
876 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++insert_iter));
878 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
879 DB((dbg, LEVEL_1, "------------------------\n"));
880 } while (a_env.changes != 0);
882 /* last step: eliminate nodes */
883 irg_walk_graph(irg, NULL, eliminate, &a_env);
884 eliminate_nodes(a_env.pairs);
886 /* clean up: delete all sets */
887 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
888 ir_valueset_del(bl_info->exp_gen);
889 ir_valueset_del(bl_info->avail_out);
890 ir_valueset_del(bl_info->antic_in);
891 if (bl_info->new_set)
892 ir_valueset_del(bl_info->new_set);
894 del_identities(value_table);
895 ir_nodemap_destroy(&value_map);
896 obstack_free(&obst, NULL);
899 /* pin the graph again: This is needed due to the use of set_opt_global_cse(1) */
900 set_irg_pinned(irg, op_pin_state_pinned);
901 restore_optimization_state(&state);
904 set_irg_outs_inconsistent(irg);
905 set_irg_loopinfo_inconsistent(irg);
909 /* Creates an ir_graph pass for do_gvn_pre. */
910 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
912 return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
913 } /* do_gvn_pre_pass */