3 * File name: ir/opt/gvn_pre.c
4 * Purpose: Global Value Numbering Partial Redundancy Elimination
5 * (VanDrunen Hosking 2004)
6 * Author: Michael Beck, Rubino Geiss
9 * Copyright: (c) 1998-2006 Universität Karlsruhe
10 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
19 #include "irgraph_t.h"
34 typedef struct block_info {
35 pset *nodes; /**< the set of nodes per block */
36 pset *avail_out; /**< the Avail_out set for a block */
37 pset *antic_in; /**< the Antic_in set for a block */
38 pset *new_set; /**< the set of all new values for a block */
39 ir_node *avail; /**< the get_map(avail, block) result */
40 int not_found; /**< non-zero, if avail was not found in this block */
41 struct block_info *next;
44 typedef struct avail_env {
45 struct obstack *obst; /**< the obstack to allocate on */
46 ir_node *start_block; /**< the start block of the current graph */
47 ir_node *end_block; /**< the end block of the current graph */
48 block_info *list; /**< links all block info entires for easier recovery */
49 int changes; /**< non-zero, if calculation of Antic_in has changed */
52 /** The debug module handle. */
53 static firm_dbg_module_t *dbg;
56 * returns non-zero if a node is movable.
58 static int is_nice_value(ir_node *n) {
59 ir_mode *mode = get_irn_mode(n);
61 if (!mode_is_data(mode))
63 if (is_irn_constlike(n))
65 return (get_irn_pinned(n) != op_pin_state_pinned);
68 #define pset_foreach(v, s) for ((v) = pset_first(s); (v); (v) = pset_next(s))
70 /** computes dst = dst \/ src */
71 static void pset_union(pset *dst, pset *src, unsigned (*hash)(void *))
75 pset_foreach(entry, src) {
76 pset_insert(dst, entry, ir_node_hash(entry));
82 * Dump the Avail or Antic sets
84 static void dump_set(pset *set, char *txt, ir_node *block)
89 DBG((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
91 pset_foreach(n, set) {
93 DBG((dbg, LEVEL_2, "\n"));
94 DBG((dbg, LEVEL_2, " %+F,", n));
97 DBG((dbg, LEVEL_2, "\n}\n"));
101 #define dump_set(set, txt, block)
102 #endif /* DEBUG_libfirm */
106 * Return the block info of a block
108 static block_info *get_block_info(ir_node *block) {
109 return get_irn_link(block);
113 * computes Avail_out(block):
115 * Avail_in(block) = Avail_out(dom(block))
116 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
119 * This function must be called in the top-down dominance order:
120 * Then, it computes Leader(Nodes(block)) instead of Nodes(block) !
122 static void compute_avail_top_down(ir_node *block, void *ctx)
124 avail_env *env = ctx;
125 block_info *dom_info;
126 block_info *info = get_block_info(block);
129 /* we don't need the end block Avail */
130 if (block == env->end_block)
133 pset_union(info->avail_out, info->nodes, ir_node_hash);
135 /* the root has no dominator */
136 if (block != env->start_block) {
137 dom_blk = get_Block_idom(block);
138 assert(is_Block(dom_blk));
140 dom_info = get_block_info(dom_blk);
143 pset_union(info->avail_out, dom_info->avail_out, ir_node_hash);
145 dump_set(info->avail_out, "Avail_out", block);
149 * Implement phi_translate
151 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, avail_env *env)
155 int i, arity = get_irn_intra_arity(node);
159 if (get_nodes_block(node) == block)
160 return get_Phi_pred(node, pos);
164 /* check if the node has at least one Phi predecessor */
165 for (i = 0; i < arity; ++i) {
166 ir_node *phi = get_irn_intra_n(node, i);
167 if (is_Phi(phi) && get_nodes_block(phi) == block)
171 /* no Phi in the predecessors */
175 pred_block = get_Block_cfgpred_block(block, pos);
177 /* Create a copy of the node in the pos'th predecessor block.
178 Use our environmental obstack, as these nodes are always
180 old = current_ir_graph->obst;
181 current_ir_graph->obst = env->obst;
183 get_irn_dbg_info(node),
190 /* We need the attribute copy here, because the Hash value of a
191 node might depend on that. */
192 copy_node_attr(node, res);
193 current_ir_graph->obst = old;
195 for (i = -1; i < arity; ++i) {
196 ir_node *pred = get_irn_intra_n(node, i);
199 set_irn_n(res, i, pred);
201 set_irn_n(res, i, get_Phi_pred(pred, pos));
203 set_irn_link(res, NULL);
208 * computes Antic_in(block):
211 static void compute_antic(ir_node *block, void *ctx)
213 avail_env *env = ctx;
214 block_info *succ_info;
215 block_info *info = get_block_info(block);
219 /* no need for computations in start block */
220 if (block == env->start_block)
223 size = pset_count(info->antic_in);
225 /* the root has no dominator */
226 if (block != env->end_block) {
227 int n_succ = get_Block_n_cfg_outs(block);
232 pset *nodes = new_pset(identities_cmp, 8);
234 pset_union(nodes, info->nodes, ir_node_hash);
236 /* find blocks position in succ's block predecessors */
237 succ = get_Block_cfg_out(block, 0);
238 for (i = get_Block_n_cfgpreds(succ) - 1; i >= 0; --i) {
239 if (get_Block_cfgpred_block(succ, i) == block) {
246 succ_info = get_block_info(succ);
247 for (node = pset_first(succ_info->antic_in);
249 node = pset_next(succ_info->antic_in)) {
250 ir_node *trans = phi_translate(node, succ, pos, env);
252 identify_remember(nodes, trans);
254 /* add all predecessors of node */
255 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
256 ir_node *pred = get_irn_n(node, i);
257 ir_node *trans = phi_translate(pred, succ, pos, env);
259 if (is_nice_value(trans))
260 identify_remember(nodes, trans);
263 /* this step calculates Antic_in(b) = Antic_out(b) \/ Nodes(b) */
264 pset_union(info->antic_in, nodes, ir_node_hash);
269 block_info *succ0_info;
274 /* Select a successor to compute the disjoint of all Nodes
275 sets, it might be useful to select the block with the
276 smallest number of nodes. For simplicity we choose the
278 succ0 = get_Block_cfg_out(block, 0);
279 succ0_info = get_block_info(succ0);
280 for (n = pset_first(succ0_info->antic_in);
282 n = pset_next(succ0_info->antic_in)) {
283 /* we need the disjoint */
284 for (i = 1; i < n_succ; ++i) {
285 ir_node *succ = get_Block_cfg_out(block, i);
286 block_info *succ_info = get_block_info(succ);
287 if (pset_find(succ_info->antic_in, n, ir_node_hash(n)) == NULL)
291 /* we found a node that is common in all Antic_in(succ(b)),
292 put it in Antic_in(b) */
293 identify_remember(info->antic_in, n);
296 /* this step calculates Antic_in(b) = Antic_out(b) \/ Nodes(b) */
297 pset_union(info->antic_in, info->nodes, ir_node_hash);
301 if (size != pset_count(info->antic_in))
302 /* the Antic_in set has changed */
305 dump_set(info->antic_in, "Antic_in", block);
306 } /* compute_antic */
309 * allocate a block info
311 static void alloc_blk_info(ir_node *block, void *ctx)
314 avail_env *env = ctx;
315 block_info *info = obstack_alloc(env->obst, sizeof(block_info));
317 set_irn_link(block, info);
318 info->nodes = new_pset(identities_cmp, 8);
319 info->antic_in = new_pset(identities_cmp, 8);
320 info->avail_out = new_pset(identities_cmp, 8);
323 info->new_set = NULL;
324 info->next = env->list;
325 env->list = info->next;
327 /* fill the nodes set, we will need it later */
328 for (i = get_irn_n_outs(block) - 1; i >= 0; --i) {
329 ir_node *n = get_irn_out(block, i);
331 /* clear the link field here, we need it later */
332 set_irn_link(n, NULL);
334 /* we cannot optimize pinned nodes, so do not remember them */
335 if (is_nice_value(n))
336 identify_remember(info->nodes, n);
337 else if (is_Phi(n) && get_irn_mode(n) != mode_M) {
339 * Phis are "temporaries" and must be handled special:
340 * They are avail, but are not in Antic_in
342 identify_remember(info->avail_out, n);
348 * Compare two nodes for equal operands.
350 static int operands_equal(ir_node *n1, ir_node *n2)
352 int i, arity = get_irn_arity(n1);
353 assert(n1->op == n2->op && arity == get_irn_arity(n2));
354 for (i = 0; i < arity; ++i)
355 if (get_irn_n(n1, i) != get_irn_n(n2, i))
361 * Get the leader of an expression. In Firm, only
362 * Phi nodes can be leaders, all other 'leader' are
363 * handled by the identify_remember mechanism right.
365 static ir_node *get_leader(ir_node *n)
367 ir_node *l = get_irn_link(n);
376 static ir_node *has_leader(ir_node *n)
378 ir_node *l = get_irn_link(n);
388 * Perform insertion of partially redundant values.
389 * For every Block node, do the following:
390 * 1. Propagate the NEW_SETS of the dominator into the current block.
391 * If the block has multiple predecessors,
392 * 2a. Iterate over the ANTIC expressions for the block to see if
393 * any of them are partially redundant.
394 * 2b. If so, insert them into the necessary predecessors to make
395 * the expression fully redundant.
396 * 2c. Insert a new Phi merging the values of the predecessors.
397 * 2d. Insert the new Phi, and the new expressions, into the
400 static void insert_nodes(ir_node *block, void *ctx)
402 avail_env *env = ctx;
403 ir_node *v, *idom, *first_s;
404 block_info *curr_info, *idom_info;
405 int pos, arity = get_irn_intra_arity(block);
406 int all_same, by_some;
408 curr_info = get_block_info(block);
409 curr_info->new_set = new_pset(identities_cmp, 8);
411 if (block == env->start_block)
414 idom = get_Block_idom(block);
415 idom_info = get_block_info(idom);
417 /* update the new_sets */
418 pset_union(curr_info->new_set, idom_info->new_set, ir_node_hash);
419 pset_foreach(v, idom_info->new_set) {
420 ir_node *old = identify_remember(idom_info->new_set, v);
423 /* v must dominate old here */
424 assert(block_dominates(get_nodes_block(v), get_nodes_block(old)));
426 pset_remove(curr_info->avail_out, old, ir_node_hash(old));
427 identify_remember(curr_info->avail_out, v);
430 dump_set(curr_info->avail_out, "[Avail_out]", block);
435 pset_foreach(v, curr_info->antic_in) {
437 * If we already have a leader for this node,
438 * it is totally redundant.
443 /* If the value was already computed in the dominator, then
444 it is totally redundant. Hence we have nothing to insert. */
445 if (pset_find(idom_info->avail_out, v, ir_node_hash(v))) {
446 // DBG((dbg, LEVEL_2, "Found %+F from block %+F avail in dom %+F\n", v, block, idom));
454 /* for all predecessor blocks */
455 for (pos = 0; pos < arity; ++pos) {
456 block_info *pred_info;
457 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
458 ir_node *trans, *found;
460 /* ignore bad blocks. */
461 if (is_Bad(pred_blk))
464 trans = phi_translate(v, block, pos, env);
466 pred_info = get_block_info(pred_blk);
467 found = pset_find(pred_info->avail_out, trans, ir_node_hash(trans));
471 pred_info->avail = trans;
472 pred_info->not_found = 1;
475 found = get_leader(found);
476 pred_info->avail = found;
477 pred_info->not_found = 0;
481 else if (first_s != found)
484 DBG((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", v, block, found, pred_blk));
488 /* If it's not the same value already existing along every predecessor, and
489 it's defined by some predecessor, it is partially redundant. */
490 if (! all_same && by_some) {
492 ir_mode *mode = NULL;
493 DBG((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", v, block));
495 in = xmalloc(arity * sizeof(*in));
496 /* for all predecessor blocks */
497 for (pos = 0; pos < arity; ++pos) {
498 ir_node *pred_blk = get_Block_cfgpred_block(block, pos);
499 block_info *pred_info = get_block_info(pred_blk);
501 /* ignore bad blocks. */
502 if (is_Bad(pred_blk)) {
507 /* ignore blocks that already have the expression */
508 if (pred_info->not_found) {
509 ir_node *avail = pred_info->avail;
511 assert(! is_Phi(avail));
513 mode = get_irn_mode(avail);
515 get_irn_dbg_info(avail),
516 current_ir_graph, pred_blk,
519 get_irn_arity(avail),
520 get_irn_in(avail) + 1);
522 pred_info->avail = identify_remember(pred_info->avail_out, nn);
524 in[pos] = pred_info->avail;
526 phi = new_r_Phi(current_ir_graph, block, arity, in, mode);
528 identify_remember(curr_info->avail_out, phi);
529 identify_remember(curr_info->new_set, phi);
530 /* v might be translated, so add it here */
531 identify_remember(curr_info->avail_out, v);
532 identify_remember(curr_info->new_set, v);
533 set_irn_link(v, phi);
534 DBG((dbg, LEVEL_2, "New %+F for redundant %+F created\n", phi, v));
541 * Do the elimination step
543 static void eliminate_nodes(ir_node *block, void *ctx)
545 avail_env *env = ctx;
546 block_info *curr_info = get_block_info(block);
549 pset_foreach(v, curr_info->nodes) {
550 ir_node *l = identify_remember(curr_info->avail_out, v);
554 DBG((dbg, LEVEL_2, "Replacing %+F by %+F\n", v, l));
560 void do_gvn_pre(ir_graph *irg)
564 optimization_state_t state;
568 /* register a debug mask */
569 dbg = firm_dbg_register("firm.opt.gvn_pre");
574 a_env.start_block = get_irg_start_block(irg);
575 a_env.end_block = get_irg_end_block(irg);
577 /* Move Proj's into the same block as their args,
578 else we would assign the result to wrong blocks */
579 normalize_proj_nodes(irg);
581 /* critical edges MUST be removed */
582 remove_critical_cf_edges(irg);
584 /* we need dominator AND post dominator information */
585 if (get_irg_dom_state(irg) != dom_consistent)
587 if (get_irg_postdom_state(irg) != dom_consistent)
588 compute_postdoms(irg);
589 if (get_irg_outs_state(irg) != outs_consistent)
590 compute_irg_outs(irg);
593 * Switch on GCSE. We need it to correctly compute
594 * the leader of a node by hashing.
596 save_optimization_state(&state);
597 set_opt_global_cse(1);
599 /* allocate block info for all blocks */
600 irg_block_walk_graph(irg, NULL, alloc_blk_info, &a_env);
602 /* compute the available value sets for all blocks */
603 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
605 /* compute the anticipated value sets for all blocks */
607 DBG((dbg, LEVEL_1, "Antic_in Iteration %d starts ...\n", ++iter));
609 irg_block_walk_graph(irg, compute_antic, NULL, &a_env);
610 // postdom_tree_walk_irg(irg, compute_antic, NULL, &a_env);
611 DBG((dbg, LEVEL_1, "------------------------\n"));
612 } while (a_env.changes != 0);
614 /* compute redundant expressions */
617 DBG((dbg, LEVEL_1, "Insert Iteration %d starts ...\n", ++iter));
619 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
620 DBG((dbg, LEVEL_1, "------------------------\n"));
621 } while (a_env.changes != 0);
623 /* last step: eliminate nodes */
624 dom_tree_walk_irg(irg, eliminate_nodes, NULL, &a_env);
626 restore_optimization_state(&state);
628 /* clean up: delete all sets */
629 for (p = a_env.list; p != NULL; p = p->next) {
631 del_pset(p->antic_in);
633 del_pset(p->avail_out);
637 obstack_free(&obst, NULL);