3 * File name: ir/opt/gvn_pre.c
4 * Purpose: Global Value Numbering Partial Redundancy Elimination
5 * (VanDrunen Hosking 2004)
6 * Author: Michael Beck, Rubino Geiss
9 * Copyright: (c) 1998-2006 Universität Karlsruhe
10 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
19 #include "irgraph_t.h"
34 typedef struct block_info {
35 pset *nodes; /**< the set of nodes per block */
36 pset *avail_out; /**< the Avail_out set for a block */
37 pset *antic_in; /**< the Antic_in set for a block */
38 pset *new_set; /**< the set of all new values for a block */
39 ir_node *avail; /**< the get_map(avail, block) result */
40 int not_found; /**< non-zero, if avail was not found in this block */
41 struct block_info *next;
44 typedef struct elim_pair {
47 struct elim_pair *next;
50 typedef struct pre_env {
51 struct obstack *obst; /**< the obstack to allocate on */
52 pset *trans_set; /**< the set of all translated values */
53 ir_node *start_block; /**< the start block of the current graph */
54 ir_node *end_block; /**< the end block of the current graph */
55 block_info *list; /**< links all block info entires for easier recovery */
56 elim_pair *pairs; /**< a list of node pairs that mut be eliminated */
57 int changes; /**< non-zero, if calculation of Antic_in has changed */
60 /** The debug module handle. */
61 static firm_dbg_module_t *dbg;
64 * returns non-zero if a node is movable.
66 static int is_nice_value(ir_node *n) {
67 ir_mode *mode = get_irn_mode(n);
69 if (!mode_is_data(mode))
71 if (is_irn_constlike(n))
73 return (get_irn_pinned(n) != op_pin_state_pinned);
76 #define pset_foreach(v, s) for ((v) = pset_first(s); (v); (v) = pset_next(s))
78 typedef unsigned (*HASH_FUNC)(void *);
82 * Dump the Avail or Antic sets
84 static void dump_set(pset *set, char *txt, ir_node *block)
89 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
91 pset_foreach(n, set) {
93 DB((dbg, LEVEL_2, "\n"));
94 DB((dbg, LEVEL_2, " %+F,", n));
97 DB((dbg, LEVEL_2, "\n}\n"));
101 #define dump_set(set, txt, block)
102 #endif /* DEBUG_libfirm */
106 * Return the block info of a block
108 static block_info *get_block_info(ir_node *block) {
109 return get_irn_link(block);
112 #define new_value_set() new_pset(identities_cmp, 8)
113 #define del_value_set(set) del_pset(set)
116 * Add a value to a value set
118 static ir_node *value_add(pset *value_set, ir_node *n)
120 return identify_remember(value_set, n);
124 * Lookup a value in a value set
126 static ir_node *lookup(pset *value_set, ir_node *n)
128 return pset_find(value_set, n, ir_node_hash(n));
131 /** computes dst = dst \/ src for value sets */
132 static void value_union(pset *dst, pset *src)
135 pset_foreach(entry, src)
136 value_add(dst, entry);
141 * computes Avail_out(block):
143 * Avail_in(block) = Avail_out(dom(block))
144 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
147 * This function must be called in the top-down dominance order:
148 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
150 static void compute_avail_top_down(ir_node *block, void *ctx)
153 block_info *dom_info;
154 block_info *info = get_block_info(block);
157 /* we don't need the end block Avail */
158 if (block == env->end_block)
162 * First add all nodes from the dominator.
163 * This must be done to ensure that Antic_out contains the leader
164 * for every node. The root has no dominator.
166 if (block != env->start_block) {
167 dom_blk = get_Block_idom(block);
168 assert(is_Block(dom_blk));
170 dom_info = get_block_info(dom_blk);
173 value_union(info->avail_out, dom_info->avail_out);
175 value_union(info->avail_out, info->nodes);
177 dump_set(info->avail_out, "Avail_out", block);
181 * Returns the Phi-leader if one exists, else NULL.
183 static ir_node *has_Phi_leader(ir_node *n)
185 ir_node *l = get_irn_link(n);
192 } /* has_Phi_leader */
195 * Get the leader of an expression.
197 static ir_node *find_leader(pset *value_set, ir_node *n)
199 ir_node *l = has_Phi_leader(n);
202 return lookup(value_set, n);
206 * Implements phi_translate
208 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, pre_env *env)
213 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
214 block_info *pred_info = get_block_info(pred_block);
217 if (get_nodes_block(node) == block) {
218 ir_node *leader, *pred;
219 pred = get_Phi_pred(node, pos);
220 leader = find_leader(pred_info->avail_out, pred);
221 assert(leader || ! is_nice_value(pred));
222 node = leader != NULL ? leader : pred;
227 arity = get_irn_intra_arity(node);
229 /* check if the node has at least one Phi predecessor */
230 for (i = 0; i < arity; ++i) {
231 ir_node *pred = get_irn_intra_n(node, i);
232 ir_node *local_bl = get_irn_intra_n(pred, -1);
233 ir_node *leader = find_leader(get_block_info(local_bl)->avail_out, pred);
235 assert(leader || ! is_nice_value(pred));
236 leader = leader != NULL ? leader : pred;
237 if (is_Phi(leader) && get_nodes_block(leader) == block)
241 /* no Phi in the predecessors */
245 /* Create a copy of the node in the pos'th predecessor block.
246 Use our environmental obstack, as these nodes are always
248 old = current_ir_graph->obst;
249 current_ir_graph->obst = env->obst;
251 get_irn_dbg_info(node),
258 /* We need the attribute copy here, because the Hash value of a
259 node might depend on that. */
260 copy_node_attr(node, nn);
262 set_irn_n(nn, -1, get_irn_intra_n(node, -1));
263 for (i = 0; i < arity; ++i) {
264 ir_node *pred = get_irn_intra_n(node, i);
265 ir_node *local_bl = get_irn_intra_n(pred, -1);
266 ir_node *leader = find_leader(get_block_info(local_bl)->avail_out, pred);
268 leader = leader != NULL ? leader : pred;
269 if (is_Phi(leader) && get_nodes_block(leader) == block)
270 set_irn_n(nn, i, get_Phi_pred(leader, pos));
272 set_irn_n(nn, i, leader);
274 set_irn_link(nn, NULL);
276 res = value_add(env->trans_set, nn);
277 current_ir_graph->obst = old;
280 obstack_free(env->obst, nn);
282 DB((dbg, LEVEL_2, "Translate %+F into %+F\n", node, res));
285 } /* phi_translate */
288 * computes Antic_in(block):
290 static void compute_antic(ir_node *block, void *ctx)
293 block_info *succ_info;
294 block_info *info = get_block_info(block);
298 /* no need for computations in start block */
299 if (block == env->start_block)
302 size = pset_count(info->antic_in);
304 /* the root has no dominator */
305 if (block != env->end_block) {
306 int n_succ = get_Block_n_cfg_outs(block);
311 pset *nodes = new_value_set();
313 value_union(nodes, info->nodes);
315 /* find blocks position in succ's block predecessors */
316 succ = get_Block_cfg_out(block, 0);
317 for (i = get_Block_n_cfgpreds(succ) - 1; i >= 0; --i) {
318 if (get_Block_cfgpred_block(succ, i) == block) {
325 succ_info = get_block_info(succ);
326 for (node = pset_first(succ_info->antic_in);
328 node = pset_next(succ_info->antic_in)) {
329 ir_node *trans = phi_translate(node, succ, pos, env);
331 value_add(nodes, trans);
333 /* add all predecessors of node */
334 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
335 ir_node *pred = get_irn_n(node, i);
336 ir_node *trans = phi_translate(pred, succ, pos, env);
338 if (is_nice_value(trans))
339 value_add(nodes, trans);
342 /* this step calculates Antic_in(b) = Antic_out(b) \/ Nodes(b) */
343 value_union(info->antic_in, nodes);
344 del_value_set(nodes);
348 block_info *succ0_info;
353 /* Select a successor to compute the disjoint of all Nodes
354 sets, it might be useful to select the block with the
355 smallest number of nodes. For simplicity we choose the
357 succ0 = get_Block_cfg_out(block, 0);
358 succ0_info = get_block_info(succ0);
359 for (n = pset_first(succ0_info->antic_in);
361 n = pset_next(succ0_info->antic_in)) {
362 /* we need the disjoint */
363 for (i = 1; i < n_succ; ++i) {
364 ir_node *succ = get_Block_cfg_out(block, i);
365 block_info *succ_info = get_block_info(succ);
366 if (lookup(succ_info->antic_in, n) == NULL)
370 /* we found a node that is common in all Antic_in(succ(b)),
371 put it in Antic_in(b) */
372 value_add(info->antic_in, n);
375 /* this step calculates Antic_in(b) = Antic_out(b) \/ Nodes(b) */
376 value_union(info->antic_in, info->nodes);
380 if (size != pset_count(info->antic_in)) {
381 /* the Antic_in set has changed */
383 dump_set(info->antic_in, "Antic_in", block);
385 } /* compute_antic */
388 * allocate a block info
390 static void alloc_blk_info(ir_node *block, void *ctx)
394 block_info *info = obstack_alloc(env->obst, sizeof(block_info));
396 set_irn_link(block, info);
397 info->nodes = new_value_set();
398 info->antic_in = new_value_set();
399 info->avail_out = new_value_set();
402 info->new_set = NULL;
403 info->next = env->list;
404 env->list = info->next;
406 /* fill the nodes set, we will need it later */
407 for (i = get_irn_n_outs(block) - 1; i >= 0; --i) {
408 ir_node *n = get_irn_out(block, i);
410 /* clear the link field here, we need it later */
411 set_irn_link(n, NULL);
413 /* we cannot optimize pinned nodes, so do not remember them */
414 if (is_nice_value(n))
415 value_add(info->nodes, n);
416 else if (is_Phi(n) && get_irn_mode(n) != mode_M) {
418 * Phis are "temporaries" and must be handled special:
419 * They are avail, but are not in Antic_in
421 value_add(info->avail_out, n);
427 * Compare two nodes for equal operands.
429 static int operands_equal(ir_node *n1, ir_node *n2)
436 arity = get_irn_arity(n1);
437 assert(n1->op == n2->op && arity == get_irn_arity(n2));
438 for (i = 0; i < arity; ++i)
439 if (! operands_equal(get_irn_n(n1, i), get_irn_n(n2, i)))
445 * Replace a value in a set by an node computing the same
446 * value in a dominator block.
448 * @return non-zero if a replacement took place
450 static int value_replace(pset *set, ir_node *e)
452 ir_node *old = value_add(set, e);
455 /* e must dominate old here */
456 assert(block_dominates(get_nodes_block(e), get_nodes_block(old)));
458 pset_remove(set, old, ir_node_hash(old));
466 * Perform insertion of partially redundant values.
467 * For every Block node, do the following:
468 * 1. Propagate the NEW_SETS of the dominator into the current block.
469 * If the block has multiple predecessors,
470 * 2a. Iterate over the ANTIC expressions for the block to see if
471 * any of them are partially redundant.
472 * 2b. If so, insert them into the necessary predecessors to make
473 * the expression fully redundant.
474 * 2c. Insert a new Phi merging the values of the predecessors.
475 * 2d. Insert the new Phi, and the new expressions, into the
478 static void insert_nodes(ir_node *block, void *ctx)
481 ir_node *e, *idom, *first_s;
482 block_info *curr_info, *idom_info;
483 int pos, arity = get_irn_intra_arity(block);
484 int all_same, by_some, updated;
486 curr_info = get_block_info(block);
487 curr_info->new_set = new_value_set();
489 if (block == env->start_block)
492 idom = get_Block_idom(block);
493 idom_info = get_block_info(idom);
495 /* update the new_sets */
497 dump_set(idom_info->new_set, "[New Set]", idom);
498 pset_foreach(e, idom_info->new_set) {
499 value_add(curr_info->new_set, e);
500 updated |= value_replace(curr_info->avail_out, e);
503 dump_set(curr_info->avail_out, "Updated [Avail_out]", block);
508 pset_foreach(e, curr_info->antic_in) {
510 * If we already have a leader for this node,
511 * it is totally redundant.
513 if (has_Phi_leader(e))
516 /* If the value was already computed in the dominator, then
517 it is totally redundant. Hence we have nothing to insert. */
518 if (lookup(idom_info->avail_out, e)) {
519 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
527 /* for all predecessor blocks */
528 for (pos = 0; pos < arity; ++pos) {
529 block_info *pred_info;
530 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
531 ir_node *e_prime, *v_prime, *e_dprime;
533 /* ignore bad blocks. */
534 if (is_Bad(pred_blk))
537 e_prime = phi_translate(e, block, pos, env);
540 pred_info = get_block_info(pred_blk);
541 e_dprime = find_leader(pred_info->avail_out, v_prime);
543 if (e_dprime == NULL) {
545 pred_info->avail = e_prime;
546 pred_info->not_found = 1;
550 pred_info->avail = e_dprime;
551 pred_info->not_found = 0;
555 else if (first_s != e_dprime)
558 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", e, block, e_dprime, pred_blk));
562 /* If it's not the same value already existing along every predecessor, and
563 it's defined by some predecessor, it is partially redundant. */
564 if (! all_same && by_some) {
566 ir_mode *mode = NULL;
567 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", e, block));
569 in = xmalloc(arity * sizeof(*in));
570 /* for all predecessor blocks */
571 for (pos = 0; pos < arity; ++pos) {
572 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
573 block_info *pred_info = get_block_info(pred_blk);
575 /* ignore bad blocks. */
576 if (is_Bad(pred_blk)) {
581 /* ignore blocks that already have the expression */
582 if (pred_info->not_found) {
583 ir_node *e_prime = pred_info->avail;
585 assert(! is_Phi(e_prime));
587 mode = get_irn_mode(e_prime);
589 get_irn_dbg_info(e_prime),
590 current_ir_graph, pred_blk,
593 get_irn_arity(e_prime),
594 get_irn_in(e_prime) + 1);
596 DB((dbg, LEVEL_2, "New node %+F in block %+F created\n", nn, pred_blk));
597 pred_info->avail = value_add(pred_info->avail_out, nn);
599 in[pos] = pred_info->avail;
601 phi = new_r_Phi(current_ir_graph, block, arity, in, mode);
603 value_add(curr_info->avail_out, phi);
604 value_add(curr_info->new_set, phi);
605 /* e might be translated, so add it here */
606 value_add(curr_info->avail_out, e);
607 value_add(curr_info->new_set, e);
608 DB((dbg, LEVEL_2, "New %+F for redundant %+F created\n", phi, e));
609 set_irn_link(e, phi);
617 * Do the elimination step: collect all changes
618 * We cannot do the changes right here, as this would change
619 * the hash values of the nodes in the avail_out set!
621 static void collect_elim_pairs(ir_node *block, void *ctx)
624 block_info *curr_info = get_block_info(block);
627 dump_set(curr_info->nodes, "Updating nodes", block);
628 pset_foreach(v, curr_info->nodes) {
629 ir_node *l = find_leader(curr_info->avail_out, v);
633 elim_pair *p = obstack_alloc(env->obst, sizeof(*p));
637 p->next = env->pairs;
644 * Do all the recorded changes.
646 static void eliminate_nodes(elim_pair *pairs)
650 for (p = pairs; p != NULL; p = p->next) {
651 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
652 exchange(p->old_node, p->new_node);
656 void do_gvn_pre(ir_graph *irg)
660 optimization_state_t state;
664 /* register a debug mask */
665 dbg = firm_dbg_register("firm.opt.gvn_pre");
666 firm_dbg_set_mask(dbg, SET_LEVEL_2);
670 a_env.trans_set = new_value_set();
672 a_env.start_block = get_irg_start_block(irg);
673 a_env.end_block = get_irg_end_block(irg);
676 /* Move Proj's into the same block as their args,
677 else we would assign the result to wrong blocks */
678 normalize_proj_nodes(irg);
680 /* critical edges MUST be removed */
681 remove_critical_cf_edges(irg);
683 /* we need dominator for Antic_out calculation */
684 if (get_irg_dom_state(irg) != dom_consistent)
686 /* we get all nodes of a block by following outs */
687 if (get_irg_outs_state(irg) != outs_consistent)
688 compute_irg_outs(irg);
691 * Switch on GCSE. We need it to correctly compute
692 * the leader of a node by hashing.
694 save_optimization_state(&state);
695 set_opt_global_cse(1);
697 /* allocate block info for all blocks */
698 irg_block_walk_graph(irg, NULL, alloc_blk_info, &a_env);
700 /* compute the available value sets for all blocks */
701 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
703 /* compute the anticipated value sets for all blocks */
705 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++iter));
707 irg_block_walk_graph(irg, compute_antic, NULL, &a_env);
708 DB((dbg, LEVEL_1, "------------------------\n"));
709 } while (a_env.changes != 0);
711 /* compute redundant expressions */
714 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++iter));
716 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
717 DB((dbg, LEVEL_1, "------------------------\n"));
718 } while (a_env.changes != 0);
720 /* last step: eliminate nodes */
721 dom_tree_walk_irg(irg, collect_elim_pairs, NULL, &a_env);
722 eliminate_nodes(a_env.pairs);
724 restore_optimization_state(&state);
726 /* clean up: delete all sets */
727 for (p = a_env.list; p != NULL; p = p->next) {
729 del_value_set(p->antic_in);
731 del_value_set(p->avail_out);
733 del_value_set(p->nodes);
735 del_value_set(a_env.trans_set);
736 obstack_free(&obst, NULL);
737 set_irg_pinned(irg, op_pin_state_pinned);
740 set_irg_outs_inconsistent(irg);
741 set_irg_loopinfo_inconsistent(irg);