3 * File name: ir/opt/gvn_pre.c
4 * Purpose: Global Value Numbering Partial Redundancy Elimination
5 * (VanDrunen Hosking 2004)
6 * Author: Michael Beck, Rubino Geiss
9 * Copyright: (c) 1998-2006 Universität Karlsruhe
10 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
19 #include "irgraph_t.h"
33 /** Additional info we need for every block. */
34 typedef struct block_info {
35 pset *nodes; /**< The set of nodes per block. */
36 pset *avail_out; /**< The Avail_out set for a block. */
37 pset *antic_in; /**< The Antic_in set for a block. */
38 pset *new_set; /**< The set of all new values for a block. */
39 ir_node *avail; /**< The get_map(avail, block) result. */
40 int not_found; /**< Non-zero, if avail was not found in this block. */
41 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
45 * A pair of nodes that must be exchanged.
46 * We must defer the exchange because our hash-sets cannot
47 * find an already replace node else.
49 typedef struct elim_pair {
50 ir_node *old_node; /**< The old node that will be replaced. */
51 ir_node *new_node; /**< The new node. */
52 struct elim_pair *next; /**< Links all entries in a list. */
55 /** The environment for the GVN-PRE algorithm */
56 typedef struct pre_env {
57 struct obstack *obst; /**< The obstack to allocate on. */
58 pset *trans_set; /**< The set of all translated values. */
59 ir_node *start_block; /**< The start block of the current graph. */
60 ir_node *end_block; /**< The end block of the current graph */
61 block_info *list; /**< Links all block info entires for easier recovery. */
62 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
63 int changes; /**< Non-zero, if calculation of Antic_in has changed. */
66 /** The debug module handle. */
67 static firm_dbg_module_t *dbg;
70 * Returns non-zero if a node is movable.
72 static int is_nice_value(ir_node *n) {
77 mode = get_irn_mode(n);
79 * FIXME: For now, we cannot handle Div/even if it's movable.
80 * That should be fixed.
82 if (!mode_is_data(mode))
84 if (is_irn_constlike(n))
86 return (get_irn_pinned(n) != op_pin_state_pinned);
89 #define pset_foreach(v, s) for ((v) = pset_first(s); (v); (v) = pset_next(s))
95 static void dump_set(pset *set, char *txt, ir_node *block)
100 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
102 pset_foreach(n, set) {
104 DB((dbg, LEVEL_2, "\n"));
105 DB((dbg, LEVEL_2, " %+F,", n));
108 DB((dbg, LEVEL_2, "\n}\n"));
112 #define dump_set(set, txt, block)
113 #endif /* DEBUG_libfirm */
117 * Return the block info of a block
119 static block_info *get_block_info(ir_node *block) {
120 return get_irn_link(block);
123 #define new_value_set() new_pset(identities_cmp, 8)
124 #define del_value_set(set) del_pset(set)
127 * Add a value to a value set
129 static ir_node *value_add(pset *value_set, ir_node *n)
131 return identify_remember(value_set, n);
135 * Lookup a value in a value set
137 static ir_node *lookup(pset *value_set, ir_node *n)
139 return pset_find(value_set, n, ir_node_hash(n));
142 /** computes dst = dst \/ src for value sets */
143 static void value_union(pset *dst, pset *src)
146 pset_foreach(entry, src)
147 value_add(dst, entry);
151 * Computes Avail_out(block):
153 * Avail_in(block) = Avail_out(dom(block))
154 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
157 * This function must be called in the top-down dominance order:
158 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
160 static void compute_avail_top_down(ir_node *block, void *ctx)
163 block_info *dom_info;
164 block_info *info = get_block_info(block);
167 /* we don't need the end block Avail */
168 if (block == env->end_block)
172 * First add all nodes from the dominator.
173 * This must be done to ensure that Antic_out contains the leader
174 * for every node. The root has no dominator.
176 if (block != env->start_block) {
177 dom_blk = get_Block_idom(block);
178 assert(is_Block(dom_blk));
180 dom_info = get_block_info(dom_blk);
183 value_union(info->avail_out, dom_info->avail_out);
185 value_union(info->avail_out, info->nodes);
187 dump_set(info->avail_out, "Avail_out", block);
191 * Returns the Phi-leader if one exists, else NULL.
193 static ir_node *has_Phi_leader(ir_node *n)
195 ir_node *l = get_irn_link(n);
202 } /* has_Phi_leader */
205 * Returns the Phi-leader if one exists, else n.
207 static ir_node *get_Phi_leader(ir_node *n)
209 ir_node *l = get_irn_link(n);
216 } /* get_Phi_leader */
219 * Get the leader of an expression.
221 static ir_node *find_leader(pset *value_set, ir_node *n)
223 ir_node *l = has_Phi_leader(n);
226 l = lookup(value_set, n);
227 return l ? get_Phi_leader(l) : l;
231 * Implements phi_translate.
233 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, pre_env *env)
238 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
239 block_info *pred_info = get_block_info(pred_block);
242 if (get_nodes_block(node) == block) {
243 ir_node *leader, *pred;
244 pred = get_Phi_pred(node, pos);
245 leader = find_leader(pred_info->avail_out, pred);
246 assert(!leader || is_Phi(pred) || is_nice_value(pred));
247 node = leader != NULL ? leader : pred;
252 arity = get_irn_intra_arity(node);
254 /* check if the node has at least one Phi predecessor */
255 for (i = 0; i < arity; ++i) {
256 ir_node *pred = get_irn_intra_n(node, i);
257 ir_node *local_bl = get_irn_intra_n(pred, -1);
258 ir_node *leader = find_leader(get_block_info(local_bl)->avail_out, pred);
260 assert(!leader || is_Phi(pred) || is_nice_value(pred));
261 leader = leader != NULL ? leader : pred;
262 if (is_Phi(leader) && get_nodes_block(leader) == block)
266 /* no Phi in the predecessors */
270 /* Create a copy of the node in the pos'th predecessor block.
271 Use our environmental obstack, as these nodes are always
273 old = current_ir_graph->obst;
274 current_ir_graph->obst = env->obst;
276 get_irn_dbg_info(node),
283 /* We need the attribute copy here, because the Hash value of a
284 node might depend on that. */
285 copy_node_attr(node, nn);
287 set_irn_n(nn, -1, get_irn_intra_n(node, -1));
288 for (i = 0; i < arity; ++i) {
289 ir_node *pred = get_irn_intra_n(node, i);
290 ir_node *local_bl = get_irn_intra_n(pred, -1);
291 ir_node *leader = find_leader(get_block_info(local_bl)->avail_out, pred);
293 leader = leader != NULL ? leader : pred;
294 if (is_Phi(leader) && get_nodes_block(leader) == block)
295 set_irn_n(nn, i, get_Phi_pred(leader, pos));
297 set_irn_n(nn, i, leader);
299 set_irn_link(nn, NULL);
301 res = value_add(env->trans_set, nn);
302 current_ir_graph->obst = old;
305 obstack_free(env->obst, nn);
307 DB((dbg, LEVEL_2, "Translate %+F into %+F\n", node, res));
310 } /* phi_translate */
312 static int _is_clean(ir_node *n, ir_node *block)
316 if (get_nodes_block(n) != block)
324 if (! is_nice_value(n))
326 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
327 ir_node *pred = get_irn_n(n, i);
328 if (! _is_clean(pred, block))
337 static int is_clean(ir_node *n)
339 int res = _is_clean(n, get_nodes_block(n));
344 * computes Antic_in(block):
346 static void compute_antic(ir_node *block, void *ctx)
349 block_info *succ_info;
350 block_info *info = get_block_info(block);
354 /* no need for computations in start block */
355 if (block == env->start_block)
358 size = pset_count(info->antic_in);
360 /* the root has no dominator */
361 if (block != env->end_block) {
362 int n_succ = get_Block_n_cfg_outs(block);
367 pset *nodes = new_value_set();
369 pset_foreach(node, info->nodes) {
371 value_add(nodes, node);
374 /* find blocks position in succ's block predecessors */
375 succ = get_Block_cfg_out(block, 0);
376 for (i = get_Block_n_cfgpreds(succ) - 1; i >= 0; --i) {
377 if (get_Block_cfgpred_block(succ, i) == block) {
384 succ_info = get_block_info(succ);
385 for (node = pset_first(succ_info->antic_in);
387 node = pset_next(succ_info->antic_in)) {
388 ir_node *trans = phi_translate(node, succ, pos, env);
391 value_add(nodes, trans);
393 /* add all predecessors of node */
394 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
395 ir_node *pred = get_irn_n(node, i);
396 ir_node *trans = phi_translate(pred, succ, pos, env);
399 value_add(nodes, trans);
402 /* this step calculates Antic_in(b) = Antic_out(b) \/ Nodes(b) */
403 value_union(info->antic_in, nodes);
404 del_value_set(nodes);
408 block_info *succ0_info;
413 /* Select a successor to compute the disjoint of all Nodes
414 sets, it might be useful to select the block with the
415 smallest number of nodes. For simplicity we choose the
417 succ0 = get_Block_cfg_out(block, 0);
418 succ0_info = get_block_info(succ0);
419 for (n = pset_first(succ0_info->antic_in);
421 n = pset_next(succ0_info->antic_in)) {
422 /* we need the disjoint */
423 for (i = 1; i < n_succ; ++i) {
424 ir_node *succ = get_Block_cfg_out(block, i);
425 block_info *succ_info = get_block_info(succ);
426 if (lookup(succ_info->antic_in, n) == NULL)
430 /* we found a node that is common in all Antic_in(succ(b)),
431 put it in Antic_in(b) */
432 value_add(info->antic_in, n);
435 /* this step calculates Antic_in(b) = Antic_out(b) \/ Nodes(b) */
436 pset_foreach(n, info->nodes) {
438 value_add(info->antic_in, n);
443 dump_set(info->antic_in, "Antic_in", block);
444 if (size != pset_count(info->antic_in)) {
445 /* the Antic_in set has changed */
448 } /* compute_antic */
451 * allocate a block info
453 static void alloc_blk_info(ir_node *block, void *ctx)
457 block_info *info = obstack_alloc(env->obst, sizeof(block_info));
459 set_irn_link(block, info);
460 info->nodes = new_value_set();
461 info->antic_in = new_value_set();
462 info->avail_out = new_value_set();
465 info->new_set = NULL;
466 info->next = env->list;
469 /* fill the nodes set, we will need it later */
470 for (i = get_irn_n_outs(block) - 1; i >= 0; --i) {
471 ir_node *n = get_irn_out(block, i);
473 /* clear the link field here, we need it later */
474 set_irn_link(n, NULL);
476 /* we cannot optimize pinned nodes, so do not remember them */
477 if (is_nice_value(n))
478 value_add(info->nodes, n);
479 else if (is_Phi(n) && get_irn_mode(n) != mode_M) {
481 * Phis are "temporaries" and must be handled special:
482 * They are avail, but are not in Antic_in
484 value_add(info->avail_out, n);
490 * Compare two nodes for equal operands.
492 static int operands_equal(ir_node *n1, ir_node *n2)
499 arity = get_irn_arity(n1);
500 assert(n1->op == n2->op && arity == get_irn_arity(n2));
501 for (i = 0; i < arity; ++i)
502 if (! operands_equal(get_irn_n(n1, i), get_irn_n(n2, i)))
508 * Replace a value in a set by an node computing the same
509 * value in a dominator block.
511 * @return non-zero if a replacement took place
513 static int value_replace(pset *set, ir_node *e)
515 ir_node *old = value_add(set, e);
518 /* e must dominate old here */
519 assert(block_dominates(get_nodes_block(e), get_nodes_block(old)));
521 pset_remove(set, old, ir_node_hash(old));
528 static ir_node *create_shadow(ir_node *n, ir_node *block)
530 ir_mode *mode = get_irn_mode(n);
534 current_ir_graph, block,
539 copy_node_attr(n, nn);
544 * Perform insertion of partially redundant values.
545 * For every Block node, do the following:
546 * 1. Propagate the NEW_SETS of the dominator into the current block.
547 * If the block has multiple predecessors,
548 * 2a. Iterate over the ANTIC expressions for the block to see if
549 * any of them are partially redundant.
550 * 2b. If so, insert them into the necessary predecessors to make
551 * the expression fully redundant.
552 * 2c. Insert a new Phi merging the values of the predecessors.
553 * 2d. Insert the new Phi, and the new expressions, into the
556 static void insert_nodes(ir_node *block, void *ctx)
559 ir_node *e, *idom, *first_s;
560 block_info *curr_info, *idom_info;
561 int pos, arity = get_irn_intra_arity(block);
562 int all_same, by_some, updated;
564 /* ensure that even the start block has a new_set */
565 curr_info = get_block_info(block);
566 if (curr_info->new_set)
567 del_value_set(curr_info->new_set);
568 curr_info->new_set = new_value_set();
570 if (block == env->start_block)
573 idom = get_Block_idom(block);
574 idom_info = get_block_info(idom);
576 /* update the new_sets */
578 dump_set(idom_info->new_set, "[New Set]", idom);
579 pset_foreach(e, idom_info->new_set) {
580 value_add(curr_info->new_set, e);
581 updated |= value_replace(curr_info->avail_out, e);
584 dump_set(curr_info->avail_out, "Updated [Avail_out]", block);
589 pset_foreach(e, curr_info->antic_in) {
592 * If we already have a leader for this node,
593 * it is totally redundant.
595 if (has_Phi_leader(e))
598 /* If the value was already computed in the dominator, then
599 it is totally redundant. Hence we have nothing to insert. */
600 if (lookup(idom_info->avail_out, e)) {
601 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
610 /* for all predecessor blocks */
611 for (pos = 0; pos < arity; ++pos) {
612 block_info *pred_info;
613 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
614 ir_node *e_prime, *v_prime, *e_dprime;
616 /* ignore bad blocks. */
617 if (is_Bad(pred_blk))
620 e_prime = phi_translate(e, block, pos, env);
623 pred_info = get_block_info(pred_blk);
624 e_dprime = find_leader(pred_info->avail_out, v_prime);
626 if (e_dprime == NULL) {
628 pred_info->avail = e_prime;
629 pred_info->not_found = 1;
632 mode = get_irn_mode(e_dprime);
634 pred_info->avail = e_dprime;
635 pred_info->not_found = 0;
639 else if (first_s != e_dprime)
642 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", e, block, e_dprime, pred_blk));
646 /* If it's not the same value already existing along every predecessor, and
647 it's defined by some predecessor, it is partially redundant. */
648 if (! all_same && by_some) {
650 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", e, block));
652 in = xmalloc(arity * sizeof(*in));
653 /* for all predecessor blocks */
654 for (pos = 0; pos < arity; ++pos) {
655 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
656 block_info *pred_info = get_block_info(pred_blk);
658 /* ignore bad blocks. */
659 if (is_Bad(pred_blk)) {
664 /* ignore blocks that already have the expression */
665 if (pred_info->not_found) {
666 ir_node *e_prime = pred_info->avail;
668 if (!is_Phi(e_prime)) {
669 mode = get_irn_mode(e_prime);
671 get_irn_dbg_info(e_prime),
672 current_ir_graph, pred_blk,
675 get_irn_arity(e_prime),
676 get_irn_in(e_prime) + 1);
677 copy_node_attr(e_prime, nn);
679 DB((dbg, LEVEL_2, "New node %+F in block %+F created\n", nn, pred_blk));
680 pred_info->avail = value_add(pred_info->avail_out, nn);
683 in[pos] = pred_info->avail;
685 phi = new_r_Phi(current_ir_graph, block, arity, in, mode);
687 value_add(curr_info->avail_out, phi);
688 value_add(curr_info->new_set, phi);
689 DB((dbg, LEVEL_2, "New %+F(%+F) for redundant %+F(%+F) created\n",
690 phi, get_nodes_block(phi), e, get_nodes_block((e))));
692 /* the good case: we really replace an instruction */
693 if (get_nodes_block(e) == block) {
694 set_irn_link(e, phi);
697 ir_node *shadow = create_shadow(e, block);
698 set_irn_link(shadow, phi);
699 value_add(curr_info->avail_out, shadow);
700 value_add(curr_info->new_set, shadow);
709 * Do the elimination step: collect all changes
710 * We cannot do the changes right here, as this would change
711 * the hash values of the nodes in the avail_out set!
713 static void collect_elim_pairs(ir_node *block, void *ctx)
716 block_info *curr_info = get_block_info(block);
719 dump_set(curr_info->nodes, "Updating nodes", block);
720 pset_foreach(v, curr_info->nodes) {
721 ir_node *l = find_leader(curr_info->avail_out, v);
725 elim_pair *p = obstack_alloc(env->obst, sizeof(*p));
729 p->next = env->pairs;
736 * Do all the recorded changes.
738 static void eliminate_nodes(elim_pair *pairs)
742 for (p = pairs; p != NULL; p = p->next) {
743 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
744 exchange(p->old_node, p->new_node);
748 void do_gvn_pre(ir_graph *irg)
752 optimization_state_t state;
756 /* register a debug mask */
757 dbg = firm_dbg_register("firm.opt.gvn_pre");
758 //firm_dbg_set_mask(dbg, SET_LEVEL_2);
762 a_env.trans_set = new_value_set();
764 a_env.start_block = get_irg_start_block(irg);
765 a_env.end_block = get_irg_end_block(irg);
768 /* Move Proj's into the same block as their args,
769 else we would assign the result to wrong blocks */
770 normalize_proj_nodes(irg);
772 /* critical edges MUST be removed */
773 remove_critical_cf_edges(irg);
775 /* we need dominator for Antic_out calculation */
776 if (get_irg_dom_state(irg) != dom_consistent)
778 /* we get all nodes of a block by following outs */
779 if (get_irg_outs_state(irg) != outs_consistent)
780 compute_irg_outs(irg);
783 * Switch on GCSE. We need it to correctly compute
784 * the leader of a node by hashing.
786 save_optimization_state(&state);
787 set_opt_global_cse(1);
789 DB((dbg, LEVEL_1, "Doing GVN-PRE for %e\n", get_irg_entity(irg)));
790 printf("Doing GVN-PRE for %s\n", get_entity_name(get_irg_entity(irg)));
792 /* allocate block info for all blocks */
793 irg_block_walk_graph(irg, NULL, alloc_blk_info, &a_env);
795 /* compute the available value sets for all blocks */
796 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
798 /* compute the anticipated value sets for all blocks */
799 inc_irg_visited(irg);
801 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++iter));
803 irg_block_walk_graph(irg, compute_antic, NULL, &a_env);
804 DB((dbg, LEVEL_1, "------------------------\n"));
805 } while (a_env.changes != 0);
807 /* compute redundant expressions */
810 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++iter));
812 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
813 DB((dbg, LEVEL_1, "------------------------\n"));
814 } while (a_env.changes != 0);
816 /* last step: eliminate nodes */
817 dom_tree_walk_irg(irg, collect_elim_pairs, NULL, &a_env);
818 eliminate_nodes(a_env.pairs);
820 restore_optimization_state(&state);
822 /* clean up: delete all sets */
823 for (p = a_env.list; p != NULL; p = p->next) {
825 del_value_set(p->antic_in);
827 del_value_set(p->avail_out);
829 del_value_set(p->nodes);
831 del_value_set(p->new_set);
833 del_value_set(a_env.trans_set);
834 obstack_free(&obst, NULL);
835 set_irg_pinned(irg, op_pin_state_pinned);
838 set_irg_outs_inconsistent(irg);
839 set_irg_loopinfo_inconsistent(irg);