2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
40 #include "irnodemap.h"
41 #include "irnodeset.h"
45 #include "irgraph_t.h"
49 /** Additional info we need for every block. */
50 typedef struct block_info {
51 ir_valueset_t *exp_gen; /**< The set of expression per block. */
52 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
53 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
54 ir_valueset_t *new_set; /**< The set of all new values for a block. */
55 ir_node *avail; /**< The get_map(avail, block) result. */
56 int not_found; /**< Non-zero, if avail was not found in this block. */
57 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
61 * A pair of nodes that must be exchanged.
62 * We must defer the exchange because our hash-sets cannot
63 * find an already replace node else.
65 typedef struct elim_pair {
66 ir_node *old_node; /**< The old node that will be replaced. */
67 ir_node *new_node; /**< The new node. */
68 struct elim_pair *next; /**< Links all entries in a list. */
71 /** The environment for the GVN-PRE algorithm */
72 typedef struct pre_env {
73 struct obstack *obst; /**< The obstack to allocate on. */
74 ir_node *start_block; /**< The start block of the current graph. */
75 ir_node *end_block; /**< The end block of the current graph */
76 block_info *list; /**< Links all block info entires for easier recovery. */
77 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
78 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
79 char first_iter; /**< non-zero for first iteration */
82 static pset *value_table;
83 static ir_nodemap_t value_map;
85 /** The debug module handle. */
86 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
88 /* ---------- Functions for Value sets ---------- */
90 /** computes dst = dst \/ src for value sets */
91 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
93 ir_valueset_iterator_t iter;
94 ir_node *value, *expr;
96 foreach_valueset(src, value, expr, iter) {
97 ir_valueset_insert(dst, value, expr);
102 /* ---------- Functions for Values ---------- */
105 * Add a node node representing the value value to the set.
107 static ir_node *add(ir_node *e, ir_node *v)
109 v = identify_remember(value_table, v);
110 ir_nodemap_insert(&value_map, e, v);
115 * Lookup a value in a value set.
117 static ir_node *lookup(ir_node *e)
119 ir_node *value = ir_nodemap_get(&value_map, e);
121 return identify_remember(value_table, value);
126 * Return the block info of a block
128 static block_info *get_block_info(ir_node *block) {
129 return get_irn_link(block);
133 * allocate a block info
135 static void alloc_blk_info(ir_node *block, pre_env *env) {
136 block_info *info = obstack_alloc(env->obst, sizeof(*info));
138 set_irn_link(block, info);
139 info->exp_gen = ir_valueset_new(16);
140 info->avail_out = ir_valueset_new(16);
141 info->antic_in = ir_valueset_new(16);
142 info->new_set = NULL;
145 info->next = env->list;
150 * Returns non-zero if a node is movable and a possible candidate for PRE.
152 static int is_nice_value(ir_node *n) {
156 n = get_Proj_pred(n);
157 mode = get_irn_mode(n);
159 * FIXME: For now, we cannot handle Div/even if it's movable.
160 * That should be fixed.
162 if (!mode_is_data(mode))
164 return (get_irn_pinned(n) != op_pin_state_pinned);
165 } /* is_nice_value */
171 static void dump_value_set(ir_valueset_t *set, char *txt, ir_node *block) {
172 ir_valueset_iterator_t iter;
173 ir_node *value, *expr;
176 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
178 foreach_valueset(set, value, expr, iter) {
180 DB((dbg, LEVEL_2, "\n"));
182 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
184 DB((dbg, LEVEL_2, " %+F,", expr));
187 DB((dbg, LEVEL_2, "\n}\n"));
188 } /* dump_value_set */
191 #define dump_value_set(set, txt, block)
192 #endif /* DEBUG_libfirm */
195 * Topological walker. Allocates block info for every block and place nodes in topological
196 * order into the nodes set.
198 static void topo_walker(ir_node *irn, void *ctx) {
205 /* the topological walker ensures that blocks are visited before anything else */
206 alloc_blk_info(irn, env);
209 /* GVN step: remember the value */
210 value = add(irn, irn);
212 /* no need to put constants into the sets: they are always redundant */
213 if (! is_nice_value(irn) || is_irn_constlike(irn))
216 /* place this node into the set of possible nodes of its block */
217 block = get_nodes_block(irn);
218 info = get_block_info(block);
220 ir_valueset_insert(info->exp_gen, value, irn);
224 * Computes Avail_out(block):
226 * Avail_in(block) = Avail_out(dom(block))
227 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
230 * This function must be called in the top-down dominance order:
231 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
233 static void compute_avail_top_down(ir_node *block, void *ctx)
236 block_info *dom_info;
237 block_info *info = get_block_info(block);
240 /* we don't need the end block Avail */
241 if (block == env->end_block)
245 * First add all nodes from the dominator.
246 * This must be done to ensure that Antic_out contains the leader
247 * for every node. The root has no dominator.
249 if (block != env->start_block) {
250 dom_blk = get_Block_idom(block);
251 assert(is_Block(dom_blk));
253 dom_info = get_block_info(dom_blk);
256 value_union(info->avail_out, dom_info->avail_out);
258 value_union(info->avail_out, info->exp_gen);
260 dump_value_set(info->avail_out, "Avail_out", block);
264 * check if a node n is clean in block block.
266 static int _is_clean(ir_node *n, ir_node *block)
270 if (get_nodes_block(n) != block)
278 if (! is_nice_value(n))
280 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
281 ir_node *pred = get_irn_n(n, i);
282 if (! _is_clean(pred, block))
292 * check if a node n is clean.
294 static int is_clean(ir_node *n) {
295 int res = _is_clean(n, get_nodes_block(n));
300 * Implements phi_translate.
302 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, pre_env *env)
309 if (get_nodes_block(node) == block) {
310 /* a Phi inside our block */
311 return get_Phi_pred(node, pos);
313 /* already outside */
317 arity = get_irn_intra_arity(node);
319 /* check if the node has at least one Phi predecessor */
320 for (i = 0; i < arity; ++i) {
321 ir_node *pred = get_irn_intra_n(node, i);
322 ir_node *leader = lookup(pred);
324 leader = leader != NULL ? leader : pred;
325 if (is_Phi(leader) && get_nodes_block(pred) == block)
329 /* no Phi in the predecessors */
333 /* Create a copy of the node in the pos'th predecessor block.
334 Use our environmental obstack, as these nodes are always
336 old = current_ir_graph->obst;
337 current_ir_graph->obst = env->obst;
339 get_irn_dbg_info(node),
346 /* We need the attribute copy here, because the Hash value of a
347 node might depend on that. */
348 copy_node_attr(node, nn);
350 set_nodes_block(nn, get_nodes_block(node));
351 for (i = 0; i < arity; ++i) {
352 ir_node *pred = get_irn_intra_n(node, i);
353 ir_node *leader = lookup(pred);
355 leader = leader != NULL ? leader : pred;
356 if (is_Phi(leader) && get_irn_intra_n(pred, -1) == block)
357 set_irn_n(nn, i, get_Phi_pred(leader, pos));
359 set_irn_n(nn, i, leader);
361 current_ir_graph->obst = old;
363 } /* phi_translate */
366 * computes Antic_in(block):
368 static void compute_antic(ir_node *block, void *ctx) {
370 block_info *succ_info;
371 block_info *info = get_block_info(block);
372 ir_node *succ, *value, *expr;
374 ir_valueset_iterator_t iter;
376 /* no need for computations in start block */
377 if (block == env->start_block)
380 size = ir_valueset_size(info->antic_in);
382 /* the end block has no successor */
383 if (block != env->end_block) {
384 int n_succ = get_Block_n_cfg_outs(block);
389 /* find blocks position in succ's block predecessors */
390 succ = get_Block_cfg_out(block, 0);
391 for (i = get_Block_n_cfgpreds(succ) - 1; i >= 0; --i) {
392 if (get_Block_cfgpred_block(succ, i) == block) {
399 succ_info = get_block_info(succ);
400 /* translate into list: we cannot insert into a set we iterate
401 * and succ might be equal to block for endless loops */
402 foreach_valueset(succ_info->antic_in, value, expr, iter) {
403 ir_node *trans = phi_translate(expr, succ, pos, env);
406 ir_valueset_insert(info->antic_in, value, trans);
408 } else { /* n_succ > 1 */
410 block_info *succ0_info;
416 * This step puts all generated expression from the current
417 * current block into Antic_in.
418 * It is enough to do this in the first iteration only, because
419 * the set info->exp_gen is not changed anymore.
421 if (env->first_iter) {
422 foreach_valueset(info->exp_gen, value, expr, iter) {
423 ir_valueset_insert(info->antic_in, value, expr);
427 /* Select a successor to compute the disjoint of all Nodes
428 sets, it might be useful to select the block with the
429 smallest number of nodes. For simplicity we choose the
431 succ0 = get_Block_cfg_out(block, 0);
432 succ0_info = get_block_info(succ0);
433 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
434 /* we need the disjoint */
435 for (i = 1; i < n_succ; ++i) {
436 ir_node *succ = get_Block_cfg_out(block, i);
437 block_info *succ_info = get_block_info(succ);
438 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL)
442 /* we found a value that is common in all Antic_in(succ(b)),
443 put it in Antic_in(b) if the value is NOT already represented. */
444 ir_valueset_insert(info->antic_in, value, expr);
450 /* we do not need a clean here, because we ensure that only cleaned nodes are in exp_gen
451 * and all other sets */
453 dump_value_set(info->antic_in, "Antic_in", block);
454 if (size != ir_valueset_size(info->antic_in)) {
455 /* the Antic_in set has changed */
458 } /* compute_antic */
461 * Perform insertion of partially redundant values.
462 * For every Block node, do the following:
463 * 1. Propagate the NEW_SETS of the dominator into the current block.
464 * If the block has multiple predecessors,
465 * 2a. Iterate over the ANTIC expressions for the block to see if
466 * any of them are partially redundant.
467 * 2b. If so, insert them into the necessary predecessors to make
468 * the expression fully redundant.
469 * 2c. Insert a new Phi merging the values of the predecessors.
470 * 2d. Insert the new Phi, and the new expressions, into the
473 static void insert_nodes(ir_node *block, void *ctx)
476 ir_node *value, *expr, *idom, *first_s, *worklist;
477 block_info *curr_info, *idom_info;
478 int pos, arity = get_irn_intra_arity(block);
479 int all_same, by_some, updated;
480 ir_valueset_iterator_t iter;
482 /* ensure that even the start block has a new_set */
483 curr_info = get_block_info(block);
484 if (curr_info->new_set)
485 ir_valueset_del(curr_info->new_set);
486 curr_info->new_set = ir_valueset_new(16);
488 if (block == env->start_block)
491 idom = get_Block_idom(block);
492 idom_info = get_block_info(idom);
494 /* update the new_sets */
496 dump_value_set(idom_info->new_set, "[New Set]", idom);
497 foreach_valueset(idom_info->new_set, value, expr, iter) {
498 ir_valueset_insert(curr_info->new_set, value, expr);
499 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
502 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
508 /* convert the set into a list. This allows the removal of
509 * elements from the set */
511 foreach_valueset(curr_info->antic_in, value, expr, iter) {
514 /* If the value was already computed in the dominator, then
515 it is totally redundant. Hence we have nothing to insert. */
516 if (ir_valueset_lookup(idom_info->avail_out, value)) {
517 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
526 /* for all predecessor blocks */
527 for (pos = 0; pos < arity; ++pos) {
528 block_info *pred_info;
529 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
530 ir_node *e_prime, *v_prime, *e_dprime;
532 /* ignore bad blocks. */
533 if (is_Bad(pred_blk))
536 e_prime = phi_translate(expr, block, pos, env);
537 v_prime = lookup(e_prime);
541 pred_info = get_block_info(pred_blk);
542 e_dprime = ir_valueset_lookup(pred_info->avail_out, v_prime);
544 if (e_dprime == NULL) {
545 pred_info->avail = e_prime;
546 pred_info->not_found = 1;
549 pred_info->avail = e_dprime;
550 pred_info->not_found = 0;
551 mode = get_irn_mode(e_dprime);
556 else if (first_s != e_dprime)
559 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, e_dprime, pred_blk));
563 /* If it's not the same value already existing along every predecessor, and
564 it's defined by some predecessor, it is partially redundant. */
565 if (! all_same && by_some) {
568 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
570 in = xmalloc(arity * sizeof(*in));
571 /* for all predecessor blocks */
572 for (pos = 0; pos < arity; ++pos) {
573 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
574 block_info *pred_info = get_block_info(pred_blk);
576 /* ignore bad blocks. */
577 if (is_Bad(pred_blk)) {
582 /* ignore blocks that already have the expression */
583 if (pred_info->not_found) {
584 ir_node *e_prime = pred_info->avail;
586 if (!is_Phi(e_prime)) {
587 mode = get_irn_mode(e_prime);
589 get_irn_dbg_info(e_prime),
590 current_ir_graph, pred_blk,
593 get_irn_arity(e_prime),
594 get_irn_in(e_prime) + 1);
595 copy_node_attr(e_prime, nn);
597 DB((dbg, LEVEL_2, "New node %+F in block %+F created\n", nn, pred_blk));
598 ir_valueset_insert(pred_info->avail_out, add(nn, lookup(expr)), nn);
599 pred_info->avail = nn;
602 in[pos] = pred_info->avail;
604 phi = new_r_Phi(current_ir_graph, block, arity, in, mode);
605 value = add(phi, lookup(expr));
606 ir_valueset_replace(curr_info->avail_out, value, phi);
607 ir_valueset_insert(curr_info->new_set, value, phi);
609 DB((dbg, LEVEL_2, "New %+F for redundant %+F created\n", phi, expr));
612 } /* node_set_foreach */
616 * Walker, change nodes by its value if different.
618 * We cannot do the changes right here, as this would change
619 * the hash values of the nodes in the avail_out set!
621 static void eliminate(ir_node *irn, void *ctx) {
624 if (is_no_Block(irn)) {
625 ir_node *block = get_nodes_block(irn);
626 block_info *bl = get_block_info(block);
627 ir_node *value = lookup(irn);
630 ir_node *expr = ir_valueset_lookup(bl->avail_out, value);
632 if (expr != NULL && expr != irn) {
633 elim_pair *p = obstack_alloc(env->obst, sizeof(*p));
637 p->next = env->pairs;
645 * Do all the recorded changes and optimize
646 * newly created Phi's.
648 static void eliminate_nodes(elim_pair *pairs) {
651 for (p = pairs; p != NULL; p = p->next) {
652 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
654 * PRE tends to create Phi(self, self, ... , x, self, self, ...)
655 * which we can optimize here
657 if (is_Phi(p->new_node)) {
661 for (i = get_irn_intra_arity(p->new_node) - 1; i >= 0; --i) {
662 ir_node *pred = get_irn_n(p->new_node, i);
664 if (pred != p->old_node) {
675 exchange(p->old_node, p->new_node);
677 } /* eliminate_nodes */
680 * Argh: Endless loops cause problems, because the
681 * insert algorithm did not terminate. We get translated nodes that
682 * references the origin. These nodes are translated again and again...
684 * The current fix is to use post-dominance. This simple ignores
685 * endless loops, ie we cannot optimize them.
687 void do_gvn_pre(ir_graph *irg)
691 optimization_state_t state;
693 unsigned antic_iter, insert_iter;
695 /* register a debug mask */
696 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
698 /* edges will crash if enabled due to our allocate on other obstack trick */
699 edges_deactivate(irg);
701 value_table = new_identities();
702 ir_nodemap_init(&value_map);
707 a_env.start_block = get_irg_start_block(irg);
708 a_env.end_block = get_irg_end_block(irg);
711 /* Move Proj's into the same block as their args,
712 else we would assign the result to wrong blocks */
713 normalize_proj_nodes(irg);
715 /* critical edges MUST be removed */
716 remove_critical_cf_edges(irg);
718 /* we need dominator for Antic_out calculation */
720 assure_postdoms(irg);
721 /* we get all nodes of a block by following outs */
722 assure_irg_outs(irg);
725 * Switch on GCSE. We need it to correctly compute
726 * the leader of a node by hashing.
728 save_optimization_state(&state);
729 set_opt_global_cse(1);
731 DB((dbg, LEVEL_1, "Doing GVN-PRE for %e\n", get_irg_entity(irg)));
733 /* allocate block info for all blocks */
734 irg_walk_blkwise_graph(irg, NULL, topo_walker, &a_env);
736 /* compute the available value sets for all blocks */
737 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
739 /* compute the anticipated value sets for all blocks */
741 a_env.first_iter = 1;
743 /* we use the visited flag to mark non-clean nodes */
744 inc_irg_visited(irg);
746 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++antic_iter));
748 //irg_block_walk_graph(irg, compute_antic, NULL, &a_env);
749 postdom_tree_walk_irg(irg, compute_antic, NULL, &a_env);
750 a_env.first_iter = 0;
751 DB((dbg, LEVEL_1, "------------------------\n"));
752 } while (a_env.changes != 0);
754 /* compute redundant expressions */
757 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++insert_iter));
759 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
760 DB((dbg, LEVEL_1, "------------------------\n"));
761 } while (a_env.changes != 0);
763 /* last step: eliminate nodes */
764 irg_walk_graph(irg, NULL, eliminate, &a_env);
765 eliminate_nodes(a_env.pairs);
767 /* clean up: delete all sets */
768 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
769 ir_valueset_del(bl_info->exp_gen);
770 ir_valueset_del(bl_info->avail_out);
771 ir_valueset_del(bl_info->antic_in);
772 if (bl_info->new_set)
773 ir_valueset_del(bl_info->new_set);
775 del_identities(value_table);
776 ir_nodemap_destroy(&value_map);
777 obstack_free(&obst, NULL);
780 /* pin the graph again: This is needed due to the use of set_opt_global_cse(1) */
781 set_irg_pinned(irg, op_pin_state_pinned);
782 restore_optimization_state(&state);
785 set_irg_outs_inconsistent(irg);
786 set_irg_loopinfo_inconsistent(irg);