2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
40 #include "irnodemap.h"
41 #include "irnodeset.h"
44 #include "irgraph_t.h"
48 /** Additional info we need for every block. */
49 typedef struct block_info {
50 ir_valueset_t *exp_gen; /**< The set of expression per block. */
51 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
52 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
53 ir_valueset_t *new_set; /**< The set of all new values for a block. */
54 ir_node *avail; /**< The get_map(avail, block) result. */
55 int not_found; /**< Non-zero, if avail was not found in this block. */
56 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
60 * A pair of nodes that must be exchanged.
61 * We must defer the exchange because our hash-sets cannot
62 * find an already replace node else.
64 typedef struct elim_pair {
65 ir_node *old_node; /**< The old node that will be replaced. */
66 ir_node *new_node; /**< The new node. */
67 struct elim_pair *next; /**< Links all entries in a list. */
70 /** The environment for the GVN-PRE algorithm */
71 typedef struct pre_env {
72 struct obstack *obst; /**< The obstack to allocate on. */
73 ir_node *start_block; /**< The start block of the current graph. */
74 ir_node *end_block; /**< The end block of the current graph */
75 pset *trans_set; /**< The set of all translated values. */
76 block_info *list; /**< Links all block info entires for easier recovery. */
77 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
78 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
79 char first_iter; /**< non-zero for first iteration */
82 static pset *value_table;
83 static ir_nodemap_t value_map;
85 /** The debug module handle. */
86 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
88 /* ---------- Functions for Value sets ---------- */
90 /** computes dst = dst \/ src for value sets */
91 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
93 ir_valueset_iterator_t iter;
94 ir_node *value, *expr;
96 foreach_valueset(src, value, expr, iter) {
97 ir_valueset_insert(dst, value, expr);
102 /* ---------- Functions for Values ---------- */
105 * Add a node node representing the value value to the set.
107 static ir_node *add(ir_node *e, ir_node *v)
109 v = identify_remember(value_table, v);
110 ir_nodemap_insert(&value_map, e, v);
115 * Lookup a value in a value set.
117 static ir_node *lookup(ir_node *e)
119 ir_node *value = ir_nodemap_get(&value_map, e);
121 return identify_remember(value_table, value);
126 * Add or replace a value in a set by an node computing the same
127 * value in a dominator block.
129 static ir_node *lookup_or_add(ir_node *e)
131 ir_node *x = lookup(e);
141 * Return the block info of a block
143 static block_info *get_block_info(ir_node *block) {
144 return get_irn_link(block);
148 * allocate a block info
150 static void alloc_blk_info(ir_node *block, pre_env *env) {
151 block_info *info = obstack_alloc(env->obst, sizeof(*info));
153 set_irn_link(block, info);
154 info->exp_gen = ir_valueset_new(16);
155 info->avail_out = ir_valueset_new(16);
156 info->antic_in = ir_valueset_new(16);
157 info->new_set = NULL;
160 info->next = env->list;
165 * Returns non-zero if a node is movable and a possible candidate for PRE.
167 static int is_nice_value(ir_node *n) {
171 n = get_Proj_pred(n);
172 mode = get_irn_mode(n);
174 * FIXME: For now, we cannot handle Div/even if it's movable.
175 * That should be fixed.
177 if (!mode_is_data(mode))
179 return (get_irn_pinned(n) != op_pin_state_pinned);
180 } /* is_nice_value */
184 * Dump a ir_nodeset_t set.
186 static void dump_node_set(ir_nodeset_t *set, char *txt, ir_node *block)
188 ir_nodeset_iterator_t iter;
192 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
194 foreach_ir_nodeset(set, n, iter) {
196 DB((dbg, LEVEL_2, "\n"));
197 DB((dbg, LEVEL_2, " %+F,", n));
200 DB((dbg, LEVEL_2, "\n}\n"));
201 } /* dump_node_set */
206 static void dump_value_set(ir_valueset_t *set, char *txt, ir_node *block) {
207 ir_valueset_iterator_t iter;
208 ir_node *value, *expr;
211 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
213 foreach_valueset(set, value, expr, iter) {
215 DB((dbg, LEVEL_2, "\n"));
217 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
219 DB((dbg, LEVEL_2, " %+F,", expr));
222 DB((dbg, LEVEL_2, "\n}\n"));
223 } /* dump_value_set */
227 #define dump_node_set(set, txt, block)
228 #define dump_value_set(set, txt, block)
229 #endif /* DEBUG_libfirm */
232 * Topological walker. Allocates block info for every block and place nodes in topological
233 * order into the nodes set.
235 static void topo_walker(ir_node *irn, void *ctx) {
242 /* the topological walker ensures that blocks are visited before anything else */
243 alloc_blk_info(irn, env);
246 /* GVN step: remember the value */
247 value = add(irn, irn);
249 /* no need to put constants into the sets: they are always redundant */
250 if (! is_nice_value(irn) || is_irn_constlike(irn))
253 /* place this node into the set of possible nodes of its block */
254 block = get_nodes_block(irn);
255 info = get_block_info(block);
257 ir_valueset_insert(info->exp_gen, value, irn);
261 * Computes Avail_out(block):
263 * Avail_in(block) = Avail_out(dom(block))
264 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
267 * This function must be called in the top-down dominance order:
268 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
270 static void compute_avail_top_down(ir_node *block, void *ctx)
273 block_info *dom_info;
274 block_info *info = get_block_info(block);
277 /* we don't need the end block Avail */
278 if (block == env->end_block)
282 * First add all nodes from the dominator.
283 * This must be done to ensure that Antic_out contains the leader
284 * for every node. The root has no dominator.
286 if (block != env->start_block) {
287 dom_blk = get_Block_idom(block);
288 assert(is_Block(dom_blk));
290 dom_info = get_block_info(dom_blk);
293 value_union(info->avail_out, dom_info->avail_out);
295 value_union(info->avail_out, info->exp_gen);
297 dump_value_set(info->avail_out, "Avail_out", block);
301 * check if a node n is clean in block block.
303 static int _is_clean(ir_node *n, ir_node *block)
307 if (get_nodes_block(n) != block)
315 if (! is_nice_value(n))
317 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
318 ir_node *pred = get_irn_n(n, i);
319 if (! _is_clean(pred, block))
329 * check if a node n is clean.
331 static int is_clean(ir_node *n) {
332 int res = _is_clean(n, get_nodes_block(n));
337 * Implements phi_translate.
339 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, pre_env *env)
346 if (get_nodes_block(node) == block) {
347 /* a Phi inside our block */
348 return get_Phi_pred(node, pos);
350 /* already outside */
354 arity = get_irn_intra_arity(node);
356 /* check if the node has at least one Phi predecessor */
357 for (i = 0; i < arity; ++i) {
358 ir_node *pred = get_irn_intra_n(node, i);
359 ir_node *pred_bl = get_nodes_block(pred);
360 ir_node *leader = lookup(pred);
362 leader = leader != NULL ? leader : pred;
363 if (is_Phi(leader) && get_nodes_block(pred) == block)
367 /* no Phi in the predecessors */
371 /* Create a copy of the node in the pos'th predecessor block.
372 Use our environmental obstack, as these nodes are always
374 old = current_ir_graph->obst;
375 current_ir_graph->obst = env->obst;
377 get_irn_dbg_info(node),
384 /* We need the attribute copy here, because the Hash value of a
385 node might depend on that. */
386 copy_node_attr(node, nn);
388 set_nodes_block(nn, get_nodes_block(node));
389 for (i = 0; i < arity; ++i) {
390 ir_node *pred = get_irn_intra_n(node, i);
391 ir_node *pred_bl = get_irn_intra_n(pred, -1);
392 ir_node *leader = lookup(pred);
394 leader = leader != NULL ? leader : pred;
395 if (is_Phi(leader) && get_irn_intra_n(pred, -1) == block)
396 set_irn_n(nn, i, get_Phi_pred(leader, pos));
398 set_irn_n(nn, i, leader);
400 current_ir_graph->obst = old;
402 res = identify_remember(env->trans_set, nn);
404 obstack_free(env->obst, nn);
406 DB((dbg, LEVEL_2, "--> Translate %+F in <%+F,%d> into %+F\n", node, block, pos, res));
409 } /* phi_translate */
412 * computes Antic_in(block):
414 static void compute_antic(ir_node *block, void *ctx) {
416 block_info *succ_info;
417 block_info *info = get_block_info(block);
418 ir_node *succ, *value, *expr;
420 ir_valueset_iterator_t iter;
422 /* no need for computations in start block */
423 if (block == env->start_block)
426 size = ir_valueset_size(info->antic_in);
428 /* the end block has no successor */
429 if (block != env->end_block) {
430 int n_succ = get_Block_n_cfg_outs(block);
435 /* find blocks position in succ's block predecessors */
436 succ = get_Block_cfg_out(block, 0);
437 for (i = get_Block_n_cfgpreds(succ) - 1; i >= 0; --i) {
438 if (get_Block_cfgpred_block(succ, i) == block) {
445 succ_info = get_block_info(succ);
446 /* translate into list: we cannot insert into a set we iterate
447 * and succ might be equal to block for endless loops */
448 foreach_valueset(succ_info->antic_in, value, expr, iter) {
449 ir_node *trans = phi_translate(expr, succ, pos, env);
452 ir_valueset_insert(info->antic_in, value, trans);
454 } else { /* n_succ > 1 */
456 block_info *succ0_info;
462 * This step puts all generated expression from the current
463 * current block into Antic_in.
464 * It is enough to do this in the first iteration only, because
465 * the set info->exp_gen is not changed anymore.
467 if (env->first_iter) {
468 foreach_valueset(info->exp_gen, value, expr, iter) {
469 ir_valueset_insert(info->antic_in, value, expr);
473 /* Select a successor to compute the disjoint of all Nodes
474 sets, it might be useful to select the block with the
475 smallest number of nodes. For simplicity we choose the
477 succ0 = get_Block_cfg_out(block, 0);
478 succ0_info = get_block_info(succ0);
479 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
480 /* we need the disjoint */
481 for (i = 1; i < n_succ; ++i) {
482 ir_node *succ = get_Block_cfg_out(block, i);
483 block_info *succ_info = get_block_info(succ);
484 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL)
488 /* we found a value that is common in all Antic_in(succ(b)),
489 put it in Antic_in(b) if the value is NOT already represented. */
490 ir_valueset_insert(info->antic_in, value, expr);
496 /* we do not need a clean here, because we ensure that only cleaned nodes are in exp_gen
497 * and all other sets */
499 dump_value_set(info->antic_in, "Antic_in", block);
500 if (size != ir_valueset_size(info->antic_in)) {
501 /* the Antic_in set has changed */
504 } /* compute_antic */
507 * Perform insertion of partially redundant values.
508 * For every Block node, do the following:
509 * 1. Propagate the NEW_SETS of the dominator into the current block.
510 * If the block has multiple predecessors,
511 * 2a. Iterate over the ANTIC expressions for the block to see if
512 * any of them are partially redundant.
513 * 2b. If so, insert them into the necessary predecessors to make
514 * the expression fully redundant.
515 * 2c. Insert a new Phi merging the values of the predecessors.
516 * 2d. Insert the new Phi, and the new expressions, into the
519 static void insert_nodes(ir_node *block, void *ctx)
522 ir_node *value, *expr, *idom, *first_s, *worklist;
523 block_info *curr_info, *idom_info;
524 int pos, arity = get_irn_intra_arity(block);
525 int all_same, by_some, updated;
526 ir_valueset_iterator_t iter;
528 /* ensure that even the start block has a new_set */
529 curr_info = get_block_info(block);
530 if (curr_info->new_set)
531 ir_valueset_del(curr_info->new_set);
532 curr_info->new_set = ir_valueset_new(16);
534 if (block == env->start_block)
537 idom = get_Block_idom(block);
538 idom_info = get_block_info(idom);
540 /* update the new_sets */
542 dump_value_set(idom_info->new_set, "[New Set]", idom);
543 foreach_valueset(idom_info->new_set, value, expr, iter) {
544 ir_valueset_insert(curr_info->new_set, value, expr);
545 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
548 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
554 /* convert the set into a list. This allows the removal of
555 * elements from the set */
557 foreach_valueset(curr_info->antic_in, value, expr, iter) {
560 /* If the value was already computed in the dominator, then
561 it is totally redundant. Hence we have nothing to insert. */
562 if (ir_valueset_lookup(idom_info->avail_out, value)) {
563 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
572 /* for all predecessor blocks */
573 for (pos = 0; pos < arity; ++pos) {
574 block_info *pred_info;
575 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
576 ir_node *e_prime, *v_prime, *e_dprime;
578 /* ignore bad blocks. */
579 if (is_Bad(pred_blk))
582 e_prime = phi_translate(expr, block, pos, env);
583 v_prime = lookup(e_prime);
587 pred_info = get_block_info(pred_blk);
588 e_dprime = ir_valueset_lookup(pred_info->avail_out, v_prime);
590 if (e_dprime == NULL) {
591 pred_info->avail = e_prime;
592 pred_info->not_found = 1;
595 pred_info->avail = e_dprime;
596 pred_info->not_found = 0;
597 mode = get_irn_mode(e_dprime);
602 else if (first_s != e_dprime)
605 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, e_dprime, pred_blk));
609 /* If it's not the same value already existing along every predecessor, and
610 it's defined by some predecessor, it is partially redundant. */
611 if (! all_same && by_some) {
614 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
616 in = xmalloc(arity * sizeof(*in));
617 /* for all predecessor blocks */
618 for (pos = 0; pos < arity; ++pos) {
619 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
620 block_info *pred_info = get_block_info(pred_blk);
622 /* ignore bad blocks. */
623 if (is_Bad(pred_blk)) {
628 /* ignore blocks that already have the expression */
629 if (pred_info->not_found) {
630 ir_node *e_prime = pred_info->avail;
632 if (!is_Phi(e_prime)) {
633 mode = get_irn_mode(e_prime);
635 get_irn_dbg_info(e_prime),
636 current_ir_graph, pred_blk,
639 get_irn_arity(e_prime),
640 get_irn_in(e_prime) + 1);
641 copy_node_attr(e_prime, nn);
643 DB((dbg, LEVEL_2, "New node %+F in block %+F created\n", nn, pred_blk));
644 ir_valueset_insert(pred_info->avail_out, add(nn, lookup(expr)), nn);
645 pred_info->avail = nn;
648 in[pos] = pred_info->avail;
650 phi = new_r_Phi(current_ir_graph, block, arity, in, mode);
651 value = add(phi, lookup(expr));
652 ir_valueset_replace(curr_info->avail_out, value, phi);
653 ir_valueset_insert(curr_info->new_set, value, phi);
655 DB((dbg, LEVEL_2, "New %+F for redundant %+F created\n", phi, expr));
658 } /* node_set_foreach */
662 * Walker, change nodes by its value if different
664 static void eliminate(ir_node *irn, void *ctx) {
665 if (is_no_Block(irn)) {
666 ir_node *block = get_nodes_block(irn);
667 block_info *bl = get_block_info(block);
668 ir_node *value = lookup(irn);
671 ir_node *expr = ir_valueset_lookup(bl->avail_out, value);
673 if (expr != NULL && expr != irn) {
674 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", irn, expr));
682 * Argh: Endless loops cause problems, because the
683 * insert algorithm did not terminate. We get translated nodes that
684 * references the origin. These nodes are translated again and again...
686 * The current fix is to use post-dominance. This simple ignores
687 * endless loops, ie we cannot optimize them.
689 void do_gvn_pre(ir_graph *irg)
693 optimization_state_t state;
695 unsigned antic_iter, insert_iter;
697 /* register a debug mask */
698 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
699 firm_dbg_set_mask(dbg, SET_LEVEL_2);
701 /* edges will crash if enabled due to our allocate on other obstack trick */
702 edges_deactivate(irg);
704 value_table = new_identities();
705 ir_nodemap_init(&value_map);
709 a_env.trans_set = value_table;
711 a_env.start_block = get_irg_start_block(irg);
712 a_env.end_block = get_irg_end_block(irg);
715 /* Move Proj's into the same block as their args,
716 else we would assign the result to wrong blocks */
717 normalize_proj_nodes(irg);
719 /* critical edges MUST be removed */
720 remove_critical_cf_edges(irg);
722 /* we need dominator for Antic_out calculation */
724 assure_postdoms(irg);
725 /* we get all nodes of a block by following outs */
726 assure_irg_outs(irg);
729 * Switch on GCSE. We need it to correctly compute
730 * the leader of a node by hashing.
732 save_optimization_state(&state);
733 set_opt_global_cse(1);
735 DB((dbg, LEVEL_1, "Doing GVN-PRE for %e\n", get_irg_entity(irg)));
737 /* allocate block info for all blocks */
738 irg_walk_blkwise_graph(irg, NULL, topo_walker, &a_env);
740 /* compute the available value sets for all blocks */
741 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
743 /* compute the anticipated value sets for all blocks */
745 a_env.first_iter = 1;
747 /* we use the visited flag to mark non-clean nodes */
748 inc_irg_visited(irg);
750 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++antic_iter));
752 //irg_block_walk_graph(irg, compute_antic, NULL, &a_env);
753 postdom_tree_walk_irg(irg, compute_antic, NULL, &a_env);
754 a_env.first_iter = 0;
755 DB((dbg, LEVEL_1, "------------------------\n"));
756 } while (a_env.changes != 0);
758 /* compute redundant expressions */
761 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++insert_iter));
763 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
764 DB((dbg, LEVEL_1, "------------------------\n"));
765 } while (a_env.changes != 0);
767 /* last step: eliminate nodes */
768 irg_walk_graph(irg, NULL, eliminate, &a_env);
770 dump_ir_block_graph(irg, "-gvn");
772 /* clean up: delete all sets */
773 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
774 ir_valueset_del(bl_info->exp_gen);
775 ir_valueset_del(bl_info->avail_out);
776 ir_valueset_del(bl_info->antic_in);
777 if (bl_info->new_set)
778 ir_valueset_del(bl_info->new_set);
780 del_identities(a_env.trans_set);
781 del_identities(value_table);
782 ir_nodemap_destroy(&value_map);
783 obstack_free(&obst, NULL);
786 /* pin the graph again: This is needed due to the use of set_opt_global_cse(1) */
787 set_irg_pinned(irg, op_pin_state_pinned);
788 restore_optimization_state(&state);
791 set_irg_outs_inconsistent(irg);
792 set_irg_loopinfo_inconsistent(irg);
795 dump_ir_block_graph(irg, "-gvn");