2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
40 #include "irnodemap.h"
41 #include "irnodeset.h"
45 #include "irgraph_t.h"
49 /** Additional info we need for every block. */
50 typedef struct block_info {
51 ir_valueset_t *exp_gen; /**< The set of expression per block. */
52 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
53 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
54 ir_valueset_t *new_set; /**< The set of all new values for a block. */
55 ir_node *avail; /**< The get_map(avail, block) result. */
56 int not_found; /**< Non-zero, if avail was not found in this block. */
57 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
61 * A pair of nodes that must be exchanged.
62 * We must defer the exchange because our hash-sets cannot
63 * find an already replace node else.
65 typedef struct elim_pair {
66 ir_node *old_node; /**< The old node that will be replaced. */
67 ir_node *new_node; /**< The new node. */
68 struct elim_pair *next; /**< Links all entries in a list. */
71 /** The environment for the GVN-PRE algorithm */
72 typedef struct pre_env {
73 struct obstack *obst; /**< The obstack to allocate on. */
74 ir_node *start_block; /**< The start block of the current graph. */
75 ir_node *end_block; /**< The end block of the current graph */
76 block_info *list; /**< Links all block info entires for easier recovery. */
77 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
78 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
79 char first_iter; /**< non-zero for first iteration */
82 static pset *value_table;
83 static ir_nodemap_t value_map;
85 /** The debug module handle. */
86 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
88 /* ---------- Functions for Value sets ---------- */
90 /** computes dst = dst \/ src for value sets */
91 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
93 ir_valueset_iterator_t iter;
94 ir_node *value, *expr;
96 foreach_valueset(src, value, expr, iter) {
97 ir_valueset_insert(dst, value, expr);
102 /* ---------- Functions for Values ---------- */
105 * Add a node node representing the value value to the set.
107 static ir_node *add(ir_node *e, ir_node *v)
110 ir_node *pred = get_Proj_pred(v);
111 ir_node *v_pred = identify_remember(value_table, pred);
113 if (v_pred != pred) {
114 /* must create a new value here */
115 v = new_r_Proj(current_ir_graph, get_nodes_block(v_pred), v_pred, get_irn_mode(v), get_Proj_proj(v));
118 v = identify_remember(value_table, v);
119 ir_nodemap_insert(&value_map, e, v);
124 * Lookup a value in a value set.
126 static ir_node *lookup(ir_node *e)
128 ir_node *value = ir_nodemap_get(&value_map, e);
130 return identify_remember(value_table, value);
135 * Return the block info of a block
137 static block_info *get_block_info(ir_node *block) {
138 return get_irn_link(block);
142 * allocate a block info
144 static void alloc_blk_info(ir_node *block, pre_env *env) {
145 block_info *info = obstack_alloc(env->obst, sizeof(*info));
147 set_irn_link(block, info);
148 info->exp_gen = ir_valueset_new(16);
149 info->avail_out = ir_valueset_new(16);
150 info->antic_in = ir_valueset_new(16);
151 info->new_set = NULL;
154 info->next = env->list;
159 * Returns non-zero if a node is movable and a possible candidate for PRE.
161 static int is_nice_value(ir_node *n) {
165 n = get_Proj_pred(n);
166 if (get_irn_pinned(n) == op_pin_state_pinned)
168 mode = get_irn_mode(n);
169 if (!mode_is_data(mode)) {
170 if (! is_Div(n) && ! is_Mod(n) && ! is_DivMod(n))
172 if (! is_NoMem(get_fragile_op_mem(n)))
176 } /* is_nice_value */
182 static void dump_value_set(ir_valueset_t *set, char *txt, ir_node *block) {
183 ir_valueset_iterator_t iter;
184 ir_node *value, *expr;
187 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
189 foreach_valueset(set, value, expr, iter) {
191 DB((dbg, LEVEL_2, "\n"));
193 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
195 DB((dbg, LEVEL_2, " %+F,", expr));
198 DB((dbg, LEVEL_2, "\n}\n"));
199 } /* dump_value_set */
202 #define dump_value_set(set, txt, block)
203 #endif /* DEBUG_libfirm */
206 * Topological walker. Allocates block info for every block and place nodes in topological
207 * order into the nodes set.
209 static void topo_walker(ir_node *irn, void *ctx) {
216 /* the topological walker ensures that blocks are visited before anything else */
217 alloc_blk_info(irn, env);
220 /* GVN step: remember the value */
221 value = add(irn, irn);
223 /* no need to put constants into the sets: they are always redundant */
224 if (! is_nice_value(irn) || is_irn_constlike(irn))
227 /* Do not put mode_T nodes info the sets, or PhiT will be created
228 (which are not allowed in Firm). Instead, put the Proj's here only. */
229 if (get_irn_mode(irn) == mode_T)
232 /* place this node into the set of possible nodes of its block */
233 block = get_nodes_block(irn);
234 info = get_block_info(block);
236 ir_valueset_insert(info->exp_gen, value, irn);
240 * Computes Avail_out(block):
242 * Avail_in(block) = Avail_out(dom(block))
243 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
246 * This function must be called in the top-down dominance order:
247 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
249 static void compute_avail_top_down(ir_node *block, void *ctx)
252 block_info *dom_info;
253 block_info *info = get_block_info(block);
256 /* we don't need the end block Avail */
257 if (block == env->end_block)
261 * First add all nodes from the dominator.
262 * This must be done to ensure that Antic_out contains the leader
263 * for every node. The root has no dominator.
265 if (block != env->start_block) {
266 dom_blk = get_Block_idom(block);
267 assert(is_Block(dom_blk));
269 dom_info = get_block_info(dom_blk);
272 value_union(info->avail_out, dom_info->avail_out);
274 value_union(info->avail_out, info->exp_gen);
276 dump_value_set(info->avail_out, "Avail_out", block);
280 * check if a node n is clean in block block.
282 static int _is_clean(ir_node *n, ir_node *block)
286 if (get_nodes_block(n) != block)
294 if (! is_nice_value(n))
296 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
297 ir_node *pred = get_irn_n(n, i);
298 if (! _is_clean(pred, block))
308 * check if a node n is clean.
310 static int is_clean(ir_node *n) {
311 int res = _is_clean(n, get_nodes_block(n));
316 * Implements phi_translate.
318 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, pre_env *env)
325 if (get_nodes_block(node) == block) {
326 /* a Phi inside our block */
327 return get_Phi_pred(node, pos);
329 /* already outside */
333 arity = get_irn_intra_arity(node);
335 /* check if the node has at least one Phi predecessor */
336 for (i = 0; i < arity; ++i) {
337 ir_node *pred = get_irn_intra_n(node, i);
338 ir_node *leader = lookup(pred);
340 leader = leader != NULL ? leader : pred;
341 if (is_Phi(leader) && get_nodes_block(pred) == block)
345 /* no Phi in the predecessors */
349 /* Create a copy of the node in the pos'th predecessor block.
350 Use our environmental obstack, as these nodes are always
352 old = current_ir_graph->obst;
353 current_ir_graph->obst = env->obst;
355 get_irn_dbg_info(node),
362 /* We need the attribute copy here, because the Hash value of a
363 node might depend on that. */
364 copy_node_attr(node, nn);
366 set_nodes_block(nn, get_nodes_block(node));
367 for (i = 0; i < arity; ++i) {
368 ir_node *pred = get_irn_intra_n(node, i);
369 ir_node *leader = lookup(pred);
371 leader = leader != NULL ? leader : pred;
372 if (is_Phi(leader) && get_irn_intra_n(pred, -1) == block)
373 set_irn_n(nn, i, get_Phi_pred(leader, pos));
375 set_irn_n(nn, i, leader);
377 current_ir_graph->obst = old;
379 } /* phi_translate */
382 * computes Antic_in(block):
384 static void compute_antic(ir_node *block, void *ctx) {
386 block_info *succ_info;
387 block_info *info = get_block_info(block);
388 ir_node *succ, *value, *expr;
390 ir_valueset_iterator_t iter;
392 /* no need for computations in start block */
393 if (block == env->start_block)
396 size = ir_valueset_size(info->antic_in);
398 /* the end block has no successor */
399 if (block != env->end_block) {
403 * This step puts all generated expression from the current
404 * current block into Antic_in.
405 * It is enough to do this in the first iteration only, because
406 * the set info->exp_gen is not changed anymore.
408 if (env->first_iter) {
409 foreach_valueset(info->exp_gen, value, expr, iter) {
410 ir_valueset_insert(info->antic_in, value, expr);
414 n_succ = get_Block_n_cfg_outs(block);
418 /* find blocks position in succ's block predecessors */
419 succ = get_Block_cfg_out(block, 0);
420 for (i = get_Block_n_cfgpreds(succ) - 1; i >= 0; --i) {
421 if (get_Block_cfgpred_block(succ, i) == block) {
428 succ_info = get_block_info(succ);
429 /* translate into list: we cannot insert into a set we iterate
430 * and succ might be equal to block for endless loops */
431 foreach_valueset(succ_info->antic_in, value, expr, iter) {
432 ir_node *trans = phi_translate(expr, succ, pos, env);
435 ir_valueset_insert(info->antic_in, value, trans);
437 } else { /* n_succ > 1 */
439 block_info *succ0_info;
444 /* Select a successor to compute the disjoint of all Nodes
445 sets, it might be useful to select the block with the
446 smallest number of nodes. For simplicity we choose the
448 succ0 = get_Block_cfg_out(block, 0);
449 succ0_info = get_block_info(succ0);
450 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
451 /* we need the disjoint */
452 for (i = 1; i < n_succ; ++i) {
453 ir_node *succ = get_Block_cfg_out(block, i);
454 block_info *succ_info = get_block_info(succ);
455 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL)
459 /* we found a value that is common in all Antic_in(succ(b)),
460 put it in Antic_in(b) if the value is NOT already represented. */
461 ir_valueset_insert(info->antic_in, value, expr);
467 /* we do not need a clean here, because we ensure that only cleaned nodes are in exp_gen
468 * and all other sets */
470 dump_value_set(info->antic_in, "Antic_in", block);
471 if (size != ir_valueset_size(info->antic_in)) {
472 /* the Antic_in set has changed */
475 } /* compute_antic */
478 * Perform insertion of partially redundant values.
479 * For every Block node, do the following:
480 * 1. Propagate the NEW_SETS of the dominator into the current block.
481 * If the block has multiple predecessors,
482 * 2a. Iterate over the ANTIC expressions for the block to see if
483 * any of them are partially redundant.
484 * 2b. If so, insert them into the necessary predecessors to make
485 * the expression fully redundant.
486 * 2c. Insert a new Phi merging the values of the predecessors.
487 * 2d. Insert the new Phi, and the new expressions, into the
490 static void insert_nodes(ir_node *block, void *ctx)
493 ir_node *value, *expr, *idom, *first_s, *worklist;
494 block_info *curr_info, *idom_info;
495 int pos, arity = get_irn_intra_arity(block);
496 int all_same, by_some, updated;
497 ir_valueset_iterator_t iter;
499 /* ensure that even the start block has a new_set */
500 curr_info = get_block_info(block);
501 if (curr_info->new_set)
502 ir_valueset_del(curr_info->new_set);
503 curr_info->new_set = ir_valueset_new(16);
505 if (block == env->start_block)
508 idom = get_Block_idom(block);
509 idom_info = get_block_info(idom);
511 /* update the new_sets */
513 dump_value_set(idom_info->new_set, "[New Set]", idom);
514 foreach_valueset(idom_info->new_set, value, expr, iter) {
515 ir_valueset_insert(curr_info->new_set, value, expr);
516 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
519 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
525 /* convert the set into a list. This allows the removal of
526 * elements from the set */
528 foreach_valueset(curr_info->antic_in, value, expr, iter) {
531 /* If the value was already computed in the dominator, then
532 it is totally redundant. Hence we have nothing to insert. */
533 if (ir_valueset_lookup(idom_info->avail_out, value)) {
534 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
543 /* for all predecessor blocks */
544 for (pos = 0; pos < arity; ++pos) {
545 block_info *pred_info;
546 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
547 ir_node *e_prime, *v_prime, *e_dprime;
549 /* ignore bad blocks. */
550 if (is_Bad(pred_blk))
553 e_prime = phi_translate(expr, block, pos, env);
554 v_prime = lookup(e_prime);
558 pred_info = get_block_info(pred_blk);
559 e_dprime = ir_valueset_lookup(pred_info->avail_out, v_prime);
561 if (e_dprime == NULL) {
562 pred_info->avail = e_prime;
563 pred_info->not_found = 1;
566 pred_info->avail = e_dprime;
567 pred_info->not_found = 0;
568 mode = get_irn_mode(e_dprime);
573 else if (first_s != e_dprime)
576 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, e_dprime, pred_blk));
580 /* If it's not the same value already existing along every predecessor, and
581 it's defined by some predecessor, it is partially redundant. */
582 if (! all_same && by_some) {
585 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
587 in = xmalloc(arity * sizeof(*in));
588 /* for all predecessor blocks */
589 for (pos = 0; pos < arity; ++pos) {
590 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
591 block_info *pred_info = get_block_info(pred_blk);
593 /* ignore bad blocks. */
594 if (is_Bad(pred_blk)) {
599 /* ignore blocks that already have the expression */
600 if (pred_info->not_found) {
601 ir_node *e_prime = pred_info->avail;
603 if (!is_Phi(e_prime)) {
604 ir_node *proj_pred = NULL;
605 if (is_Proj(e_prime)) {
606 ir_node *pred = get_Proj_pred(e_prime);
607 mode = get_irn_mode(pred);
609 get_irn_dbg_info(pred),
610 current_ir_graph, pred_blk,
614 get_irn_in(pred) + 1);
615 copy_node_attr(pred, nn);
617 DB((dbg, LEVEL_2, "New node %+F in block %+F created\n", nn, pred_blk));
620 mode = get_irn_mode(e_prime);
622 get_irn_dbg_info(e_prime),
623 current_ir_graph, pred_blk,
626 get_irn_arity(e_prime),
627 get_irn_in(e_prime) + 1);
628 copy_node_attr(e_prime, nn);
629 if (proj_pred != NULL) {
630 set_Proj_pred(nn, proj_pred);
633 DB((dbg, LEVEL_2, "New node %+F in block %+F created\n", nn, pred_blk));
634 ir_valueset_insert(pred_info->avail_out, add(nn, lookup(expr)), nn);
635 pred_info->avail = nn;
638 in[pos] = pred_info->avail;
640 phi = new_r_Phi(current_ir_graph, block, arity, in, mode);
641 value = add(phi, lookup(expr));
642 ir_valueset_replace(curr_info->avail_out, value, phi);
643 ir_valueset_insert(curr_info->new_set, value, phi);
645 DB((dbg, LEVEL_2, "New %+F for redundant %+F created\n", phi, expr));
646 ir_valueset_remove_iterator(curr_info->antic_in, &iter);
649 } /* node_set_foreach */
653 * Walker, change nodes by its value if different.
655 * We cannot do the changes right here, as this would change
656 * the hash values of the nodes in the avail_out set!
658 static void eliminate(ir_node *irn, void *ctx) {
661 if (is_no_Block(irn)) {
662 ir_node *block = get_nodes_block(irn);
663 block_info *bl = get_block_info(block);
664 ir_node *value = lookup(irn);
667 ir_node *expr = ir_valueset_lookup(bl->avail_out, value);
669 if (expr != NULL && expr != irn) {
670 elim_pair *p = obstack_alloc(env->obst, sizeof(*p));
674 p->next = env->pairs;
682 * Do all the recorded changes and optimize
683 * newly created Phi's.
685 static void eliminate_nodes(elim_pair *pairs) {
688 for (p = pairs; p != NULL; p = p->next) {
689 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
691 * PRE tends to create Phi(self, self, ... , x, self, self, ...)
692 * which we can optimize here
694 if (is_Phi(p->new_node)) {
698 for (i = get_irn_intra_arity(p->new_node) - 1; i >= 0; --i) {
699 ir_node *pred = get_irn_n(p->new_node, i);
701 if (pred != p->old_node) {
712 exchange(p->old_node, p->new_node);
714 } /* eliminate_nodes */
717 * Argh: Endless loops cause problems, because the
718 * insert algorithm did not terminate. We get translated nodes that
719 * references the origin. These nodes are translated again and again...
721 * The current fix is to use post-dominance. This simple ignores
722 * endless loops, ie we cannot optimize them.
724 void do_gvn_pre(ir_graph *irg)
728 optimization_state_t state;
730 unsigned antic_iter, insert_iter;
731 ir_node *value, *expr;
733 /* register a debug mask */
734 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
736 /* edges will crash if enabled due to our allocate on other obstack trick */
737 edges_deactivate(irg);
739 value_table = new_identities();
740 ir_nodemap_init(&value_map);
745 a_env.start_block = get_irg_start_block(irg);
746 a_env.end_block = get_irg_end_block(irg);
749 /* Move Proj's into the same block as their args,
750 else we would assign the result to wrong blocks */
751 normalize_proj_nodes(irg);
753 /* critical edges MUST be removed */
754 remove_critical_cf_edges(irg);
756 /* we need dominator for Antic_out calculation */
758 assure_postdoms(irg);
759 /* we get all nodes of a block by following outs */
760 assure_irg_outs(irg);
763 * Switch on GCSE. We need it to correctly compute
764 * the leader of a node by hashing.
766 save_optimization_state(&state);
767 set_opt_global_cse(1);
769 DB((dbg, LEVEL_1, "Doing GVN-PRE for %e\n", get_irg_entity(irg)));
771 /* allocate block info for all blocks */
772 irg_walk_blkwise_graph(irg, NULL, topo_walker, &a_env);
774 /* clean the exp_gen set. Doing this here saves the cleanup in the iteration. */
775 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
776 ir_valueset_iterator_t iter;
778 foreach_valueset(bl_info->exp_gen, value, expr, iter) {
780 ir_valueset_remove_iterator(bl_info->exp_gen, &iter);
784 /* compute the available value sets for all blocks */
785 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
787 /* compute the anticipated value sets for all blocks */
789 a_env.first_iter = 1;
791 /* we use the visited flag to mark non-clean nodes */
792 inc_irg_visited(irg);
794 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++antic_iter));
796 postdom_tree_walk_irg(irg, compute_antic, NULL, &a_env);
797 a_env.first_iter = 0;
798 DB((dbg, LEVEL_1, "------------------------\n"));
799 } while (a_env.changes != 0);
801 /* compute redundant expressions */
804 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++insert_iter));
806 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
807 DB((dbg, LEVEL_1, "------------------------\n"));
808 } while (a_env.changes != 0);
810 /* last step: eliminate nodes */
811 irg_walk_graph(irg, NULL, eliminate, &a_env);
812 eliminate_nodes(a_env.pairs);
814 /* clean up: delete all sets */
815 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
816 ir_valueset_del(bl_info->exp_gen);
817 ir_valueset_del(bl_info->avail_out);
818 ir_valueset_del(bl_info->antic_in);
819 if (bl_info->new_set)
820 ir_valueset_del(bl_info->new_set);
822 del_identities(value_table);
823 ir_nodemap_destroy(&value_map);
824 obstack_free(&obst, NULL);
827 /* pin the graph again: This is needed due to the use of set_opt_global_cse(1) */
828 set_irg_pinned(irg, op_pin_state_pinned);
829 restore_optimization_state(&state);
832 set_irg_outs_inconsistent(irg);
833 set_irg_loopinfo_inconsistent(irg);