2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
38 #include "irnodemap.h"
39 #include "irnodeset.h"
41 #include "iropt_dbg.h"
44 #include "irgraph_t.h"
49 /** Additional info we need for every block. */
50 typedef struct block_info {
51 ir_valueset_t *exp_gen; /**< The set of expression per block. */
52 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
53 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
54 ir_valueset_t *new_set; /**< The set of all new values for a block. */
55 ir_node *avail; /**< The get_map(avail, block) result. */
56 ir_node *block; /**< The Block of the block info. */
57 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
58 int not_found; /**< Non-zero, if avail was not found in this block. */
62 * A pair of nodes that must be exchanged.
63 * We must defer the exchange because our hash-sets cannot
64 * find an already replace node else.
66 typedef struct elim_pair {
67 ir_node *old_node; /**< The old node that will be replaced. */
68 ir_node *new_node; /**< The new node. */
69 struct elim_pair *next; /**< Links all entries in a list. */
70 int reason; /**< The reason for the replacement. */
73 /** The environment for the GVN-PRE algorithm */
74 typedef struct pre_env {
75 struct obstack *obst; /**< The obstack to allocate on. */
76 ir_node *start_block; /**< The start block of the current graph. */
77 ir_node *end_block; /**< The end block of the current graph */
78 block_info *list; /**< Links all block info entires for easier recovery. */
79 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
80 unsigned last_idx; /**< last node index of "old" nodes, all higher indexes are newly created once. */
81 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
82 char first_iter; /**< non-zero for first iteration */
85 static pset *value_table;
86 static ir_nodemap_t value_map;
88 /** The debug module handle. */
89 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
91 /* ---------- Functions for Value sets ---------- */
93 /** computes dst = dst \/ src for value sets */
94 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
96 ir_valueset_iterator_t iter;
97 ir_node *value, *expr;
99 foreach_valueset(src, value, expr, iter) {
100 ir_valueset_insert(dst, value, expr);
105 /* ---------- Functions for Values ---------- */
108 * Add a node e representing the value v to the set.
110 * @param e a node representing an expression
111 * @param v a node representing a value
113 * @return the final value for the expression e
115 static ir_node *add(ir_node *e, ir_node *v)
118 ir_node *pred = get_Proj_pred(v);
119 ir_node *v_pred = identify_remember(value_table, pred);
121 if (v_pred != pred) {
122 /* must create a new value here */
123 v = new_r_Proj(get_nodes_block(v_pred), v_pred, get_irn_mode(v), get_Proj_proj(v));
126 v = identify_remember(value_table, v);
127 ir_nodemap_insert(&value_map, e, v);
132 * Lookup a value in a value set.
134 * @param e a node representing an expression
136 * @return a node representing the value or NULL if
137 * the given expression is not available
139 static ir_node *lookup(ir_node *e)
141 ir_node *value = ir_nodemap_get(&value_map, e);
143 return identify_remember(value_table, value);
148 * Return the block info of a block.
150 * @param block the block
152 static block_info *get_block_info(ir_node *block) {
153 return get_irn_link(block);
154 } /* get_block_info */
157 * Allocate a block info for a block.
159 * @param block the block
160 * @param env the environment
162 static void alloc_blk_info(ir_node *block, pre_env *env) {
163 block_info *info = obstack_alloc(env->obst, sizeof(*info));
165 set_irn_link(block, info);
166 info->exp_gen = ir_valueset_new(16);
167 info->avail_out = ir_valueset_new(16);
168 info->antic_in = ir_valueset_new(16);
169 info->new_set = NULL;
172 info->next = env->list;
175 } /* alloc_blk_info */
178 * Returns non-zero if a node is movable and a possible candidate for PRE.
182 static int is_nice_value(ir_node *n) {
186 n = get_Proj_pred(n);
187 if (get_irn_pinned(n) == op_pin_state_pinned)
189 mode = get_irn_mode(n);
190 if (!mode_is_data(mode)) {
191 if (! is_Div(n) && ! is_Mod(n) && ! is_DivMod(n))
193 if (! is_NoMem(get_fragile_op_mem(n)))
197 } /* is_nice_value */
203 * @param set the set to dump
204 * @param txt a text to describe the set
205 * @param block the owner block of the set
207 static void dump_value_set(ir_valueset_t *set, char *txt, ir_node *block) {
208 ir_valueset_iterator_t iter;
209 ir_node *value, *expr;
212 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
214 foreach_valueset(set, value, expr, iter) {
216 DB((dbg, LEVEL_2, "\n"));
218 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
220 DB((dbg, LEVEL_2, " %+F,", expr));
223 DB((dbg, LEVEL_2, "\n}\n"));
224 } /* dump_value_set */
227 #define dump_value_set(set, txt, block)
228 #endif /* DEBUG_libfirm */
231 * Topological walker. Allocates block info for every block and place nodes in topological
232 * order into the nodes set.
234 static void topo_walker(ir_node *irn, void *ctx) {
241 /* the topological walker ensures that blocks are visited before anything else */
242 alloc_blk_info(irn, env);
245 /* GVN step: remember the value */
246 value = add(irn, irn);
248 /* no need to put constants into the sets: they are always redundant */
249 if (! is_nice_value(irn) || is_irn_constlike(irn))
252 /* Do not put mode_T nodes info the sets, or PhiT will be created
253 (which are not allowed in Firm). Instead, put the Proj's here only. */
254 if (get_irn_mode(irn) == mode_T)
257 /* place this node into the set of possible nodes of its block */
258 block = get_nodes_block(irn);
259 info = get_block_info(block);
261 ir_valueset_insert(info->exp_gen, value, irn);
265 * Computes Avail_out(block):
267 * Avail_in(block) = Avail_out(dom(block))
268 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
271 * This function must be called in the top-down dominance order:
272 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
274 * @param block the block
275 * @param ctx walker context
277 static void compute_avail_top_down(ir_node *block, void *ctx)
280 block_info *dom_info;
281 block_info *info = get_block_info(block);
284 /* we don't need the end block Avail */
285 if (block == env->end_block)
289 * First add all nodes from the dominator.
290 * This must be done to ensure that Antic_out contains the leader
291 * for every node. The root has no dominator.
293 if (block != env->start_block) {
294 dom_blk = get_Block_idom(block);
295 assert(is_Block(dom_blk));
297 dom_info = get_block_info(dom_blk);
300 value_union(info->avail_out, dom_info->avail_out);
302 value_union(info->avail_out, info->exp_gen);
304 dump_value_set(info->avail_out, "Avail_out", block);
305 } /* compute_avail_top_down */
308 * check if a node n is clean in block block.
311 * @param block the block
312 * @param set a value set, containing the already processed predecessors
314 static int is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *set) {
320 if (! is_nice_value(n))
323 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
324 ir_node *pred = get_irn_n(n, i);
327 if (get_nodes_block(pred) != block)
333 if (! is_nice_value(pred))
336 value = lookup(pred);
339 if (! ir_valueset_lookup(set, value))
343 } /* is_clean_in_block */
346 * Implements phi_translate. Translate a expression above a Phi.
348 * @param node the node
349 * @param block the block in which the node is translated
350 * @param pos the input number of the destination block
351 * @param translated the valueset containing the other already translated nodes
353 * @return a node representing the translated value
355 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *translated)
361 if (get_nodes_block(node) == block) {
362 /* a Phi inside our block */
363 return get_Phi_pred(node, pos);
365 /* already outside */
369 arity = get_irn_intra_arity(node);
371 /* check if the node has at least one Phi predecessor */
372 for (i = 0; i < arity; ++i) {
373 ir_node *pred = get_irn_intra_n(node, i);
374 ir_node *leader = lookup(pred);
377 leader = leader != NULL ? leader : pred;
378 trans = ir_valueset_lookup(translated, leader);
380 if ((trans != NULL && trans != leader) || (is_Phi(leader) && get_nodes_block(leader) == block))
384 /* no translation needed */
389 get_irn_dbg_info(node),
396 /* We need the attribute copy here, because the Hash value of a
397 node might depend on that. */
398 copy_node_attr(node, nn);
400 set_nodes_block(nn, get_nodes_block(node));
401 for (i = 0; i < arity; ++i) {
402 ir_node *pred = get_irn_intra_n(node, i);
403 ir_node *leader = lookup(pred);
406 leader = leader != NULL ? leader : pred;
407 trans = ir_valueset_lookup(translated, leader);
411 if (is_Phi(trans) && get_nodes_block(trans) == block)
412 set_irn_n(nn, i, get_Phi_pred(trans, pos));
414 set_irn_n(nn, i, trans);
416 nn = optimize_node(nn);
418 } /* phi_translate */
421 * Block-walker, computes Antic_in(block).
423 * @param block the block
424 * @param ctx the walker environment
426 static void compute_antic(ir_node *block, void *ctx) {
428 block_info *succ_info;
429 block_info *info = get_block_info(block);
430 ir_node *succ, *value, *expr;
432 ir_valueset_iterator_t iter;
434 /* no need for computations in start block */
435 if (block == env->start_block)
438 size = ir_valueset_size(info->antic_in);
440 /* the end block has no successor */
441 if (block != env->end_block) {
445 * This step puts all generated expression from the current
446 * current block into Antic_in.
447 * It is enough to do this in the first iteration only, because
448 * the set info->exp_gen is not changed anymore.
450 if (env->first_iter) {
451 foreach_valueset(info->exp_gen, value, expr, iter) {
452 ir_valueset_insert(info->antic_in, value, expr);
456 n_succ = get_Block_n_cfg_outs(block);
460 /* find blocks position in succ's block predecessors */
461 succ = get_Block_cfg_out(block, 0);
462 pos = get_Block_cfgpred_pos(succ, block);
465 succ_info = get_block_info(succ);
466 /* translate into list: we cannot insert into a set we iterate
467 * and succ might be equal to block for endless loops */
468 foreach_valueset(succ_info->antic_in, value, expr, iter) {
469 ir_node *trans = phi_translate(expr, succ, pos, info->antic_in);
471 if (is_clean_in_block(trans, block, info->antic_in))
472 ir_valueset_insert(info->antic_in, value, trans);
474 } else { /* n_succ > 1 */
476 block_info *succ0_info;
481 /* Select a successor to compute the disjoint of all Nodes
482 sets, it might be useful to select the block with the
483 smallest number of nodes. For simplicity we choose the
485 succ0 = get_Block_cfg_out(block, 0);
486 succ0_info = get_block_info(succ0);
487 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
488 /* we need the disjoint */
489 for (i = 1; i < n_succ; ++i) {
490 ir_node *succ = get_Block_cfg_out(block, i);
491 block_info *succ_info = get_block_info(succ);
492 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL)
496 /* we found a value that is common in all Antic_in(succ(b)),
497 put it in Antic_in(b) if the value is NOT already represented. */
498 if (is_clean_in_block(expr, block, info->antic_in))
499 ir_valueset_insert(info->antic_in, value, expr);
505 /* we do not need a clean here, because we ensure that only cleaned nodes are in exp_gen
506 * and all other sets */
508 dump_value_set(info->antic_in, "Antic_in", block);
509 if (size != ir_valueset_size(info->antic_in)) {
510 /* the Antic_in set has changed */
513 } /* compute_antic */
516 * Perform insertion of partially redundant values.
517 * For every Block node, do the following:
518 * 1. Propagate the NEW_SETS of the dominator into the current block.
519 * If the block has multiple predecessors,
520 * 2a. Iterate over the ANTIC expressions for the block to see if
521 * any of them are partially redundant.
522 * 2b. If so, insert them into the necessary predecessors to make
523 * the expression fully redundant.
524 * 2c. Insert a new Phi merging the values of the predecessors.
525 * 2d. Insert the new Phi, and the new expressions, into the
528 * @param block the block
529 * @param ctx the walker environment
531 static void insert_nodes(ir_node *block, void *ctx)
534 ir_node *value, *expr, *idom, *first_s, *worklist;
535 block_info *curr_info, *idom_info;
536 int pos, arity = get_irn_intra_arity(block);
537 int all_same, by_some, updated;
538 ir_valueset_iterator_t iter;
540 /* ensure that even the start block has a new_set */
541 curr_info = get_block_info(block);
542 if (curr_info->new_set)
543 ir_valueset_del(curr_info->new_set);
544 curr_info->new_set = ir_valueset_new(16);
546 if (block == env->start_block)
549 idom = get_Block_idom(block);
550 idom_info = get_block_info(idom);
552 /* update the new_sets */
554 dump_value_set(idom_info->new_set, "[New Set]", idom);
555 foreach_valueset(idom_info->new_set, value, expr, iter) {
556 ir_valueset_insert(curr_info->new_set, value, expr);
557 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
560 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
566 /* convert the set into a list. This allows the removal of
567 * elements from the set */
569 foreach_valueset(curr_info->antic_in, value, expr, iter) {
572 /* If the value was already computed in the dominator, then
573 it is totally redundant. Hence we have nothing to insert. */
574 if (ir_valueset_lookup(idom_info->avail_out, value)) {
575 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
584 /* for all predecessor blocks */
585 for (pos = 0; pos < arity; ++pos) {
586 block_info *pred_info;
587 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
588 ir_node *e_prime, *v_prime, *e_dprime;
590 /* ignore bad blocks. */
591 if (is_Bad(pred_blk))
594 e_prime = phi_translate(expr, block, pos, curr_info->avail_out);
595 v_prime = lookup(e_prime);
599 pred_info = get_block_info(pred_blk);
600 e_dprime = ir_valueset_lookup(pred_info->avail_out, v_prime);
602 if (e_dprime == NULL) {
603 pred_info->avail = e_prime;
604 pred_info->not_found = 1;
607 pred_info->avail = e_dprime;
608 pred_info->not_found = 0;
609 mode = get_irn_mode(e_dprime);
614 else if (first_s != e_dprime)
617 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, e_dprime, pred_blk));
621 /* If it's not the same value already existing along every predecessor, and
622 it's defined by some predecessor, it is partially redundant. */
623 if (! all_same && by_some) {
624 ir_node *phi, *l, **in;
626 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
628 in = XMALLOCN(ir_node*, arity);
629 /* for all predecessor blocks */
630 for (pos = 0; pos < arity; ++pos) {
631 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
632 block_info *pred_info = get_block_info(pred_blk);
634 /* ignore bad blocks. */
635 if (is_Bad(pred_blk)) {
640 /* ignore blocks that already have the expression */
641 if (pred_info->not_found) {
642 ir_node *e_prime = pred_info->avail;
644 if (!is_Phi(e_prime)) {
645 ir_node *proj_pred = NULL;
646 if (is_Proj(e_prime)) {
647 ir_node *pred = get_Proj_pred(e_prime);
648 mode = get_irn_mode(pred);
650 get_irn_dbg_info(pred),
651 current_ir_graph, pred_blk,
655 get_irn_in(pred) + 1);
656 copy_node_attr(pred, nn);
658 DB((dbg, LEVEL_1, "New node %+F in block %+F created\n", nn, pred_blk));
661 mode = get_irn_mode(e_prime);
663 get_irn_dbg_info(e_prime),
664 current_ir_graph, pred_blk,
667 get_irn_arity(e_prime),
668 get_irn_in(e_prime) + 1);
669 copy_node_attr(e_prime, nn);
670 if (proj_pred != NULL) {
671 set_Proj_pred(nn, proj_pred);
674 DB((dbg, LEVEL_1, "New node %+F in block %+F created\n", nn, pred_blk));
677 l = add(expr, value);
679 ir_valueset_insert(pred_info->avail_out, add(nn, l), nn);
680 pred_info->avail = nn;
683 in[pos] = pred_info->avail;
685 phi = new_r_Phi(block, arity, in, mode);
688 l = add(expr, value);
691 ir_valueset_replace(curr_info->avail_out, value, phi);
692 ir_valueset_insert(curr_info->new_set, value, phi);
694 DB((dbg, LEVEL_1, "New %+F for redundant %+F created\n", phi, expr));
695 ir_valueset_remove_iterator(curr_info->antic_in, &iter);
698 } /* node_set_foreach */
702 * Walker, change nodes by its value if different.
704 * We cannot do the changes right here, as this would change
705 * the hash values of the nodes in the avail_out set!
707 * @param irn the node
708 * @param ctx the walker environment
710 static void eliminate(ir_node *irn, void *ctx) {
713 if (is_no_Block(irn)) {
714 ir_node *block = get_nodes_block(irn);
715 block_info *bl = get_block_info(block);
716 ir_node *value = lookup(irn);
719 ir_node *expr = ir_valueset_lookup(bl->avail_out, value);
721 if (expr != NULL && expr != irn) {
722 elim_pair *p = obstack_alloc(env->obst, sizeof(*p));
726 p->next = env->pairs;
727 p->reason = get_irn_idx(expr) >= env->last_idx ? FS_OPT_GVN_PARTLY : FS_OPT_GVN_FULLY;
735 * Do all the recorded changes and optimize
736 * newly created Phi's.
738 * @param pairs list of elimination pairs
740 static void eliminate_nodes(elim_pair *pairs) {
743 for (p = pairs; p != NULL; p = p->next) {
744 /* might be already changed */
745 p->new_node = skip_Id(p->new_node);
747 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
749 * PRE tends to create Phi(self, self, ... , x, self, self, ...)
750 * which we can optimize here
752 if (is_Phi(p->new_node)) {
756 for (i = get_irn_intra_arity(p->new_node) - 1; i >= 0; --i) {
757 ir_node *pred = get_irn_n(p->new_node, i);
759 if (pred != p->old_node) {
768 exchange(p->new_node, res);
772 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
773 exchange(p->old_node, p->new_node);
775 } /* eliminate_nodes */
778 * Argh: Endless loops cause problems, because the
779 * insert algorithm did not terminate. We get translated nodes that
780 * references the origin. These nodes are translated again and again...
782 * The current fix is to use post-dominance. This simple ignores
783 * endless loops, ie we cannot optimize them.
785 void do_gvn_pre(ir_graph *irg)
789 optimization_state_t state;
791 unsigned antic_iter, insert_iter;
792 ir_node *value, *expr;
794 /* register a debug mask */
795 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
797 /* edges will crash if enabled due to our allocate on other obstack trick */
798 edges_deactivate(irg);
800 value_table = new_identities();
801 ir_nodemap_init(&value_map);
806 a_env.start_block = get_irg_start_block(irg);
807 a_env.end_block = get_irg_end_block(irg);
810 /* Move Proj's into the same block as their args,
811 else we would assign the result to wrong blocks */
812 normalize_proj_nodes(irg);
814 /* critical edges MUST be removed */
815 remove_critical_cf_edges(irg);
817 /* we need dominator for Antic_out calculation */
819 assure_postdoms(irg);
820 /* we get all nodes of a block by following outs */
821 assure_irg_outs(irg);
824 * Switch on GCSE. We need it to correctly compute
825 * the value of a node, which is independent from
828 save_optimization_state(&state);
829 set_opt_global_cse(1);
831 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
833 /* allocate block info for all blocks */
834 irg_walk_blkwise_dom_top_down(irg, NULL, topo_walker, &a_env);
836 /* clean the exp_gen set. Doing this here saves the cleanup in the iteration. */
837 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
838 ir_valueset_iterator_t iter;
840 foreach_valueset(bl_info->exp_gen, value, expr, iter) {
841 if (!is_clean_in_block(expr, bl_info->block, bl_info->exp_gen))
842 ir_valueset_remove_iterator(bl_info->exp_gen, &iter);
845 /* compute the available value sets for all blocks */
846 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
848 /* compute the anticipated value sets for all blocks */
850 a_env.first_iter = 1;
852 /* we use the visited flag to mark non-clean nodes */
853 inc_irg_visited(irg);
855 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++antic_iter));
857 postdom_tree_walk_irg(irg, compute_antic, NULL, &a_env);
858 a_env.first_iter = 0;
859 DB((dbg, LEVEL_1, "------------------------\n"));
860 } while (a_env.changes != 0);
862 /* compute redundant expressions */
864 a_env.last_idx = get_irg_last_idx(irg);
866 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++insert_iter));
868 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
869 DB((dbg, LEVEL_1, "------------------------\n"));
870 } while (a_env.changes != 0);
872 /* last step: eliminate nodes */
873 irg_walk_graph(irg, NULL, eliminate, &a_env);
874 eliminate_nodes(a_env.pairs);
876 /* clean up: delete all sets */
877 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
878 ir_valueset_del(bl_info->exp_gen);
879 ir_valueset_del(bl_info->avail_out);
880 ir_valueset_del(bl_info->antic_in);
881 if (bl_info->new_set)
882 ir_valueset_del(bl_info->new_set);
884 del_identities(value_table);
885 ir_nodemap_destroy(&value_map);
886 obstack_free(&obst, NULL);
889 /* pin the graph again: This is needed due to the use of set_opt_global_cse(1) */
890 set_irg_pinned(irg, op_pin_state_pinned);
891 restore_optimization_state(&state);
894 set_irg_outs_inconsistent(irg);
895 set_irg_loopinfo_inconsistent(irg);
899 /* Creates an ir_graph pass for do_gvn_pre. */
900 ir_graph_pass_t *do_gvn_pre_pass(const char *name, int verify, int dump)
902 return def_graph_pass(name ? name : "gvn_pre", verify, dump, do_gvn_pre);
903 } /* do_gvn_pre_pass */