3 * File name: ir/opt/gvn_pre.c
4 * Purpose: Global Value Numbering Partial Redundancy Elimination
5 * (VanDrunen Hosking 2004)
6 * Author: Michael Beck, Rubino Geiss
9 * Copyright: (c) 1998-2006 Universität Karlsruhe
10 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
19 #include "irgraph_t.h"
34 typedef struct block_info {
35 pset *nodes; /**< the set of nodes per block */
36 pset *avail_out; /**< the Avail_out set for a block */
37 pset *antic_in; /**< the Antic_in set for a block */
38 pset *new_set; /**< the set of all new values for a block */
39 ir_node *avail; /**< the get_map(avail, block) result */
40 int not_found; /**< non-zero, if avail was not found in this block */
41 struct block_info *next;
44 typedef struct elim_pair {
47 struct elim_pair *next;
50 typedef struct pre_env {
51 struct obstack *obst; /**< the obstack to allocate on */
52 pset *trans_set; /**< the set of all translated values */
53 ir_node *start_block; /**< the start block of the current graph */
54 ir_node *end_block; /**< the end block of the current graph */
55 block_info *list; /**< links all block info entires for easier recovery */
56 elim_pair *pairs; /**< a list of node pairs that must be eliminated */
57 int changes; /**< non-zero, if calculation of Antic_in has changed */
60 /** The debug module handle. */
61 static firm_dbg_module_t *dbg;
64 * returns non-zero if a node is movable.
66 static int is_nice_value(ir_node *n) {
67 ir_mode *mode = get_irn_mode(n);
69 if (!mode_is_data(mode))
71 if (is_irn_constlike(n))
74 return is_nice_value(get_Proj_pred(n));
75 return (get_irn_pinned(n) != op_pin_state_pinned);
78 #define pset_foreach(v, s) for ((v) = pset_first(s); (v); (v) = pset_next(s))
80 typedef unsigned (*HASH_FUNC)(void *);
84 * Dump the Avail or Antic sets
86 static void dump_set(pset *set, char *txt, ir_node *block)
91 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
93 pset_foreach(n, set) {
95 DB((dbg, LEVEL_2, "\n"));
96 DB((dbg, LEVEL_2, " %+F,", n));
99 DB((dbg, LEVEL_2, "\n}\n"));
103 #define dump_set(set, txt, block)
104 #endif /* DEBUG_libfirm */
108 * Return the block info of a block
110 static block_info *get_block_info(ir_node *block) {
111 return get_irn_link(block);
114 #define new_value_set() new_pset(identities_cmp, 8)
115 #define del_value_set(set) del_pset(set)
118 * Add a value to a value set
120 static ir_node *value_add(pset *value_set, ir_node *n)
122 return identify_remember(value_set, n);
126 * Lookup a value in a value set
128 static ir_node *lookup(pset *value_set, ir_node *n)
130 return pset_find(value_set, n, ir_node_hash(n));
133 /** computes dst = dst \/ src for value sets */
134 static void value_union(pset *dst, pset *src)
137 pset_foreach(entry, src)
138 value_add(dst, entry);
143 * computes Avail_out(block):
145 * Avail_in(block) = Avail_out(dom(block))
146 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
149 * This function must be called in the top-down dominance order:
150 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
152 static void compute_avail_top_down(ir_node *block, void *ctx)
155 block_info *dom_info;
156 block_info *info = get_block_info(block);
159 /* we don't need the end block Avail */
160 if (block == env->end_block)
164 * First add all nodes from the dominator.
165 * This must be done to ensure that Antic_out contains the leader
166 * for every node. The root has no dominator.
168 if (block != env->start_block) {
169 dom_blk = get_Block_idom(block);
170 assert(is_Block(dom_blk));
172 dom_info = get_block_info(dom_blk);
175 value_union(info->avail_out, dom_info->avail_out);
177 value_union(info->avail_out, info->nodes);
179 dump_set(info->avail_out, "Avail_out", block);
183 * Returns the Phi-leader if one exists, else NULL.
185 static ir_node *has_Phi_leader(ir_node *n)
187 ir_node *l = get_irn_link(n);
194 } /* has_Phi_leader */
197 * Get the leader of an expression.
199 static ir_node *find_leader(pset *value_set, ir_node *n)
201 ir_node *l = has_Phi_leader(n);
204 return lookup(value_set, n);
208 * Implements phi_translate
210 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, pre_env *env)
215 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
216 block_info *pred_info = get_block_info(pred_block);
219 if (get_nodes_block(node) == block) {
220 ir_node *leader, *pred;
221 pred = get_Phi_pred(node, pos);
222 leader = find_leader(pred_info->avail_out, pred);
223 assert(leader || ! is_nice_value(pred));
224 node = leader != NULL ? leader : pred;
229 arity = get_irn_intra_arity(node);
231 /* check if the node has at least one Phi predecessor */
232 for (i = 0; i < arity; ++i) {
233 ir_node *pred = get_irn_intra_n(node, i);
234 ir_node *local_bl = get_irn_intra_n(pred, -1);
235 ir_node *leader = find_leader(get_block_info(local_bl)->avail_out, pred);
237 assert(leader || ! is_nice_value(pred));
238 leader = leader != NULL ? leader : pred;
239 if (is_Phi(leader) && get_nodes_block(leader) == block)
243 /* no Phi in the predecessors */
247 /* Create a copy of the node in the pos'th predecessor block.
248 Use our environmental obstack, as these nodes are always
250 old = current_ir_graph->obst;
251 current_ir_graph->obst = env->obst;
253 get_irn_dbg_info(node),
260 /* We need the attribute copy here, because the Hash value of a
261 node might depend on that. */
262 copy_node_attr(node, nn);
264 set_irn_n(nn, -1, get_irn_intra_n(node, -1));
265 for (i = 0; i < arity; ++i) {
266 ir_node *pred = get_irn_intra_n(node, i);
267 ir_node *local_bl = get_irn_intra_n(pred, -1);
268 ir_node *leader = find_leader(get_block_info(local_bl)->avail_out, pred);
270 leader = leader != NULL ? leader : pred;
271 if (is_Phi(leader) && get_nodes_block(leader) == block)
272 set_irn_n(nn, i, get_Phi_pred(leader, pos));
274 set_irn_n(nn, i, leader);
276 set_irn_link(nn, NULL);
278 res = value_add(env->trans_set, nn);
279 current_ir_graph->obst = old;
282 obstack_free(env->obst, nn);
284 DB((dbg, LEVEL_2, "Translate %+F into %+F\n", node, res));
287 } /* phi_translate */
290 * computes Antic_in(block):
292 static void compute_antic(ir_node *block, void *ctx)
295 block_info *succ_info;
296 block_info *info = get_block_info(block);
300 /* no need for computations in start block */
301 if (block == env->start_block)
304 size = pset_count(info->antic_in);
306 /* the root has no dominator */
307 if (block != env->end_block) {
308 int n_succ = get_Block_n_cfg_outs(block);
313 pset *nodes = new_value_set();
315 value_union(nodes, info->nodes);
317 /* find blocks position in succ's block predecessors */
318 succ = get_Block_cfg_out(block, 0);
319 for (i = get_Block_n_cfgpreds(succ) - 1; i >= 0; --i) {
320 if (get_Block_cfgpred_block(succ, i) == block) {
327 succ_info = get_block_info(succ);
328 for (node = pset_first(succ_info->antic_in);
330 node = pset_next(succ_info->antic_in)) {
331 ir_node *trans = phi_translate(node, succ, pos, env);
333 value_add(nodes, trans);
335 /* add all predecessors of node */
336 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
337 ir_node *pred = get_irn_n(node, i);
338 ir_node *trans = phi_translate(pred, succ, pos, env);
340 if (is_nice_value(trans))
341 value_add(nodes, trans);
344 /* this step calculates Antic_in(b) = Antic_out(b) \/ Nodes(b) */
345 value_union(info->antic_in, nodes);
346 del_value_set(nodes);
350 block_info *succ0_info;
355 /* Select a successor to compute the disjoint of all Nodes
356 sets, it might be useful to select the block with the
357 smallest number of nodes. For simplicity we choose the
359 succ0 = get_Block_cfg_out(block, 0);
360 succ0_info = get_block_info(succ0);
361 for (n = pset_first(succ0_info->antic_in);
363 n = pset_next(succ0_info->antic_in)) {
364 /* we need the disjoint */
365 for (i = 1; i < n_succ; ++i) {
366 ir_node *succ = get_Block_cfg_out(block, i);
367 block_info *succ_info = get_block_info(succ);
368 if (lookup(succ_info->antic_in, n) == NULL)
372 /* we found a node that is common in all Antic_in(succ(b)),
373 put it in Antic_in(b) */
374 value_add(info->antic_in, n);
377 /* this step calculates Antic_in(b) = Antic_out(b) \/ Nodes(b) */
378 value_union(info->antic_in, info->nodes);
382 if (size != pset_count(info->antic_in)) {
383 /* the Antic_in set has changed */
385 dump_set(info->antic_in, "Antic_in", block);
387 } /* compute_antic */
390 * allocate a block info
392 static void alloc_blk_info(ir_node *block, void *ctx)
396 block_info *info = obstack_alloc(env->obst, sizeof(block_info));
398 set_irn_link(block, info);
399 info->nodes = new_value_set();
400 info->antic_in = new_value_set();
401 info->avail_out = new_value_set();
404 info->new_set = NULL;
405 info->next = env->list;
406 env->list = info->next;
408 /* fill the nodes set, we will need it later */
409 for (i = get_irn_n_outs(block) - 1; i >= 0; --i) {
410 ir_node *n = get_irn_out(block, i);
412 /* clear the link field here, we need it later */
413 set_irn_link(n, NULL);
415 /* we cannot optimize pinned nodes, so do not remember them */
416 if (is_nice_value(n))
417 value_add(info->nodes, n);
418 else if (is_Phi(n) && get_irn_mode(n) != mode_M) {
420 * Phis are "temporaries" and must be handled special:
421 * They are avail, but are not in Antic_in
423 value_add(info->avail_out, n);
429 * Compare two nodes for equal operands.
431 static int operands_equal(ir_node *n1, ir_node *n2)
438 arity = get_irn_arity(n1);
439 assert(n1->op == n2->op && arity == get_irn_arity(n2));
440 for (i = 0; i < arity; ++i)
441 if (! operands_equal(get_irn_n(n1, i), get_irn_n(n2, i)))
447 * Replace a value in a set by an node computing the same
448 * value in a dominator block.
450 * @return non-zero if a replacement took place
452 static int value_replace(pset *set, ir_node *e)
454 ir_node *old = value_add(set, e);
457 /* e must dominate old here */
458 assert(block_dominates(get_nodes_block(e), get_nodes_block(old)));
460 pset_remove(set, old, ir_node_hash(old));
468 * Perform insertion of partially redundant values.
469 * For every Block node, do the following:
470 * 1. Propagate the NEW_SETS of the dominator into the current block.
471 * If the block has multiple predecessors,
472 * 2a. Iterate over the ANTIC expressions for the block to see if
473 * any of them are partially redundant.
474 * 2b. If so, insert them into the necessary predecessors to make
475 * the expression fully redundant.
476 * 2c. Insert a new Phi merging the values of the predecessors.
477 * 2d. Insert the new Phi, and the new expressions, into the
480 static void insert_nodes(ir_node *block, void *ctx)
483 ir_node *e, *idom, *first_s;
484 block_info *curr_info, *idom_info;
485 int pos, arity = get_irn_intra_arity(block);
486 int all_same, by_some, updated;
488 curr_info = get_block_info(block);
489 curr_info->new_set = new_value_set();
491 if (block == env->start_block)
494 idom = get_Block_idom(block);
495 idom_info = get_block_info(idom);
497 /* update the new_sets */
499 dump_set(idom_info->new_set, "[New Set]", idom);
500 pset_foreach(e, idom_info->new_set) {
501 value_add(curr_info->new_set, e);
502 updated |= value_replace(curr_info->avail_out, e);
505 dump_set(curr_info->avail_out, "Updated [Avail_out]", block);
510 pset_foreach(e, curr_info->antic_in) {
512 * If we already have a leader for this node,
513 * it is totally redundant.
515 if (has_Phi_leader(e))
518 /* If the value was already computed in the dominator, then
519 it is totally redundant. Hence we have nothing to insert. */
520 if (lookup(idom_info->avail_out, e)) {
521 // DB((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
529 /* for all predecessor blocks */
530 for (pos = 0; pos < arity; ++pos) {
531 block_info *pred_info;
532 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
533 ir_node *e_prime, *v_prime, *e_dprime;
535 /* ignore bad blocks. */
536 if (is_Bad(pred_blk))
539 e_prime = phi_translate(e, block, pos, env);
542 pred_info = get_block_info(pred_blk);
543 e_dprime = find_leader(pred_info->avail_out, v_prime);
545 if (e_dprime == NULL) {
547 pred_info->avail = e_prime;
548 pred_info->not_found = 1;
552 pred_info->avail = e_dprime;
553 pred_info->not_found = 0;
557 else if (first_s != e_dprime)
560 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", e, block, e_dprime, pred_blk));
564 /* If it's not the same value already existing along every predecessor, and
565 it's defined by some predecessor, it is partially redundant. */
566 if (! all_same && by_some) {
568 ir_mode *mode = NULL;
569 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", e, block));
571 in = xmalloc(arity * sizeof(*in));
572 /* for all predecessor blocks */
573 for (pos = 0; pos < arity; ++pos) {
574 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
575 block_info *pred_info = get_block_info(pred_blk);
577 /* ignore bad blocks. */
578 if (is_Bad(pred_blk)) {
583 /* ignore blocks that already have the expression */
584 if (pred_info->not_found) {
585 ir_node *e_prime = pred_info->avail;
587 assert(! is_Phi(e_prime));
589 mode = get_irn_mode(e_prime);
591 get_irn_dbg_info(e_prime),
592 current_ir_graph, pred_blk,
595 get_irn_arity(e_prime),
596 get_irn_in(e_prime) + 1);
598 DB((dbg, LEVEL_2, "New node %+F in block %+F created\n", nn, pred_blk));
599 pred_info->avail = value_add(pred_info->avail_out, nn);
601 in[pos] = pred_info->avail;
603 phi = new_r_Phi(current_ir_graph, block, arity, in, mode);
605 value_add(curr_info->avail_out, phi);
606 value_add(curr_info->new_set, phi);
607 /* e might be translated, so add it here */
608 value_add(curr_info->avail_out, e);
609 value_add(curr_info->new_set, e);
610 DB((dbg, LEVEL_2, "New %+F for redundant %+F created\n", phi, e));
611 set_irn_link(e, phi);
619 * Do the elimination step: collect all changes
620 * We cannot do the changes right here, as this would change
621 * the hash values of the nodes in the avail_out set!
623 static void collect_elim_pairs(ir_node *block, void *ctx)
626 block_info *curr_info = get_block_info(block);
629 dump_set(curr_info->nodes, "Updating nodes", block);
630 pset_foreach(v, curr_info->nodes) {
631 ir_node *l = find_leader(curr_info->avail_out, v);
635 elim_pair *p = obstack_alloc(env->obst, sizeof(*p));
639 p->next = env->pairs;
646 * Do all the recorded changes.
648 static void eliminate_nodes(elim_pair *pairs)
652 for (p = pairs; p != NULL; p = p->next) {
653 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
654 exchange(p->old_node, p->new_node);
658 void do_gvn_pre(ir_graph *irg)
662 optimization_state_t state;
666 /* register a debug mask */
667 dbg = firm_dbg_register("firm.opt.gvn_pre");
668 firm_dbg_set_mask(dbg, SET_LEVEL_2);
672 a_env.trans_set = new_value_set();
674 a_env.start_block = get_irg_start_block(irg);
675 a_env.end_block = get_irg_end_block(irg);
678 /* Move Proj's into the same block as their args,
679 else we would assign the result to wrong blocks */
680 normalize_proj_nodes(irg);
682 /* critical edges MUST be removed */
683 remove_critical_cf_edges(irg);
685 /* we need dominator for Antic_out calculation */
686 if (get_irg_dom_state(irg) != dom_consistent)
688 /* we get all nodes of a block by following outs */
689 if (get_irg_outs_state(irg) != outs_consistent)
690 compute_irg_outs(irg);
693 * Switch on GCSE. We need it to correctly compute
694 * the leader of a node by hashing.
696 save_optimization_state(&state);
697 set_opt_global_cse(1);
699 DB((dbg, LEVEL_1, "Doing GVN-PRE for %e\n", get_irg_entity(irg)));
701 /* allocate block info for all blocks */
702 irg_block_walk_graph(irg, NULL, alloc_blk_info, &a_env);
704 /* compute the available value sets for all blocks */
705 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
707 /* compute the anticipated value sets for all blocks */
709 DB((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++iter));
711 irg_block_walk_graph(irg, compute_antic, NULL, &a_env);
712 DB((dbg, LEVEL_1, "------------------------\n"));
713 } while (a_env.changes != 0);
715 /* compute redundant expressions */
718 DB((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++iter));
720 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
721 DB((dbg, LEVEL_1, "------------------------\n"));
722 } while (a_env.changes != 0);
724 /* last step: eliminate nodes */
725 dom_tree_walk_irg(irg, collect_elim_pairs, NULL, &a_env);
726 eliminate_nodes(a_env.pairs);
728 restore_optimization_state(&state);
730 /* clean up: delete all sets */
731 for (p = a_env.list; p != NULL; p = p->next) {
733 del_value_set(p->antic_in);
735 del_value_set(p->avail_out);
737 del_value_set(p->nodes);
739 del_value_set(a_env.trans_set);
740 obstack_free(&obst, NULL);
741 set_irg_pinned(irg, op_pin_state_pinned);
744 set_irg_outs_inconsistent(irg);
745 set_irg_loopinfo_inconsistent(irg);