2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
45 #include "irgraph_t.h"
49 /** Additional info we need for every block. */
50 typedef struct block_info {
51 ir_valueset_t *exp_gen; /**< contains this blocks clean expressions */
52 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
53 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
54 ir_valueset_t *new_set; /**< The set of all new values for a block. */
55 ir_nodehashmap_t *trans; /**< contains translated nodes in block */
56 ir_node *avail; /**< The get_map(avail, block) result. */
57 ir_node *block; /**< The Block of the block info. */
58 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
59 int found; /**< Non-zero, if avail was found in this block. */
63 * A pair of nodes that must be exchanged.
64 * We must defer the exchange because our hash-sets cannot
65 * find an already replace node else.
67 typedef struct elim_pair {
68 ir_node *old_node; /**< The old node that will be replaced. */
69 ir_node *new_node; /**< The new node. */
70 struct elim_pair *next; /**< Links all entries in a list. */
71 int reason; /**< The reason for the replacement. */
74 /** The environment for the GVN-PRE algorithm */
75 typedef struct pre_env {
76 struct obstack *obst; /**< The obstack to allocate on. */
77 ir_node *start_block; /**< The start block of the current graph. */
78 ir_node *end_block; /**< The end block of the current graph */
79 block_info *list; /**< Links all block info entries for easier recovery. */
80 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
81 unsigned last_idx; /**< last node index of "old" nodes, all higher indexes are newly created once. */
82 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
83 char first_iter; /**< non-zero for first iteration */
86 /** The debug module handle. */
87 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
89 /* ---------- Functions for Value sets ---------- */
93 * computes dst = dst \/ src for value sets
95 * @param dst the union result set
96 * @param src the source set
98 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
100 ir_valueset_iterator_t iter;
104 foreach_valueset(src, value, expr, iter) {
105 /* dominator tree walk; use first available expr as leader */
106 ir_valueset_insert(dst, value, expr);
111 /* ---------- Functions for Values ---------- */
114 * Remember adds a node e to the GCSE valuetable.
116 * @param e a node representing an expression
117 * @return the final value for the expression e
119 static ir_node *remember(ir_node *e)
124 ir_node *pred = get_Proj_pred(e);
125 ir_node *v_pred = identify_remember(pred);
127 if (v_pred != pred) {
128 ir_node *proj = new_r_Proj(v_pred, get_irn_mode(e), get_Proj_proj(e));
129 value = identify_remember(proj);
134 value = identify_remember(e);
139 * Identify does a lookup in the GCSE valuetable.
141 * @param e a node representing an expression
142 * @return a node representing the value or NULL if no identified
144 static ir_node *identify(ir_node *e)
146 return identify_remember(e);
150 * Returns the block info of a block.
152 * @param block the block
153 * @return block info of block
155 static block_info *get_block_info(ir_node *block)
157 return (block_info*)get_irn_link(block);
161 * Allocate block info for block block.
163 * @param block the block
164 * @param env the environment
166 static void alloc_block_info(ir_node *block, pre_env *env)
168 block_info *info = OALLOC(env->obst, block_info);
170 set_irn_link(block, info);
171 info->exp_gen = ir_valueset_new(16);
172 info->avail_out = ir_valueset_new(16);
173 info->antic_in = ir_valueset_new(16);
174 /* valueset has much nicer interface */
175 info->trans = XMALLOC(ir_nodehashmap_t);
176 ir_nodehashmap_init(info->trans);
178 info->new_set = NULL;
183 info->next = env->list;
185 } /* alloc_block_info */
188 * Returns non-zero if a node is movable and a possible candidate for PRE.
191 * @return non-zero if value is nice
193 static int is_nice_value(ir_node *n)
195 ir_mode *mode = get_irn_mode(n);
204 n = get_Proj_pred(n);
206 /* we may not move pinned nodes */
207 if (get_irn_pinned(n) == op_pin_state_pinned)
210 if (!mode_is_data(mode)) {
211 /* Div and Mod are only nice if they do not use memory. */
212 if (! is_Div(n) && ! is_Mod(n))
214 if (! is_NoMem(get_memop_mem(n)))
218 } /* is_nice_value */
225 * @param set the set to dump
226 * @param txt a text to describe the set
227 * @param block the owner block of the set
229 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
231 ir_valueset_iterator_t iter;
236 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
237 foreach_valueset(set, value, expr, iter) {
239 DB((dbg, LEVEL_2, "\n"));
241 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
243 DB((dbg, LEVEL_2, " %+F,", expr));
246 DB((dbg, LEVEL_2, "\n}\n"));
247 } /* dump_value_set */
250 * Dump all exp_gen value sets.
252 * @param list the list of block infos to retrieve the sets from
254 static void dump_all_expgen_sets(block_info *list)
258 for (bl_info = list; bl_info != NULL; bl_info = bl_info->next) {
259 dump_value_set(bl_info->exp_gen, "[Exp_gen]", bl_info->block);
264 #define dump_value_set(set, txt, block)
265 #define dump_all_expgen_sets(list)
266 #endif /* DEBUG_libfirm */
269 * Gets result of nodes phi translation into block.
271 * @param node the node
272 * @param block the target block
274 * @return a phi translation of node node into block block or NULL
276 static ir_node *get_translated(ir_node *node, ir_node *block)
281 if (is_irn_constlike(node))
284 bi = get_block_info(block);
285 trans = ir_nodehashmap_get(ir_node, bi->trans, node);
290 * Saves result of phi translation of node into predecessor
291 * at pos of block succ.
293 * @param node the node
294 * @param succ the successor of the translation target block
295 * @param pos the position of the predecessor block
296 * @param trans the translation result
299 static void set_translated(ir_node *node, ir_node *succ, int pos, ir_node *trans)
301 ir_node *pred = get_Block_cfgpred_block(succ, pos);
302 block_info *bi = get_block_info(pred);
304 ir_nodehashmap_insert(bi->trans, node, trans);
308 * Checks if a node node is clean in block block for use in antic_in.
310 * A clean node in block block can be hoisted above block block.
311 * A node is not clean if its value is killed in block block.
312 * The node can still be hoisted into block block.
314 * @param n the phi translated or not translated node
315 * @param block the block
316 * @return non-zero value for clean node
318 static int is_clean_in_block_antic(ir_node *node, ir_node *block)
322 if (get_irn_mode(node) == mode_M)
325 /* a phi only has predecessors in other blocks */
329 /* constants are in start block */
330 if (is_irn_constlike(node))
333 /* what we really want to check
334 Only for node is translated case; other are clean anyway */
335 if (! is_nice_value(node)) {
339 /* cleanliness depends on nodes predecessors
340 At least if node is translated. */
341 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
342 ir_node *pred = get_irn_n(node, i);
346 if (is_irn_constlike(pred))
349 /* exp_gen only contains clean nodes */
350 if (ir_valueset_lookup(get_block_info(block)->exp_gen, pred))
353 /* block of pred strictly dominates target block. pred irrelevant. */
354 if (block_strictly_dominates(get_nodes_block(pred), block))
357 /* --- pred neither in block, nor dominating -- */
359 /* This pred is in antic_in and such clean.
360 Not every clean pred is in antic_in though.
361 Predecessor might be translated or not */
362 value = identify(pred);
363 if (ir_valueset_lookup(get_block_info(block)->antic_in, value))
366 /* This check is not redundant for translated nodes;
367 non translated ones are already nice. */
368 if (! is_nice_value(pred)) {
369 DB((dbg, LEVEL_5, "unclean %+F because pred %+F not nice\n", node, pred));
373 /* predecessor is not translated. This is legal if
374 predecessor is dominating or in target block (already checked). */
375 trans = get_translated(pred, block);
377 DB((dbg, LEVEL_5, "unclean %+F because pred %+F unclean (not translated)\n", node, pred));
381 /* Node and predecessor are translated, but is pred clean?
382 The value of the translated predecessor has to be in antic_in. */
383 ir_node *value = identify(trans);
384 if (! ir_valueset_lookup(get_block_info(block)->antic_in, value)) {
385 DB((dbg, LEVEL_5, "unclean %+F because pred %+F value %+F not antic\n", node, pred, value));
390 assert(0 && "should have been catched");
395 } /* is_clean_in_block */
398 * Checks if a node n is clean in block block for exp_gen.
401 * @param block the block
402 * @return non-zero value for clean node
404 static int is_clean_in_block_expgen(ir_node *n, ir_node *block)
408 if (get_irn_mode(n) == mode_M)
414 if (! is_nice_value(n))
417 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
418 ir_node *pred = get_irn_n(n, i);
420 /* sufficient for exp_gen because block is always block of node */
421 if (get_nodes_block(pred) != block)
425 so it needs to be clean (already in exp_gen) */
426 if (! get_irn_link(pred)) {
427 DB((dbg, LEVEL_5, "unclean %+F because pred %+F unclean\n", n, pred));
434 } /* is_clean_in_block */
437 * Does blocklocal common subexpression elimination (CSE).
439 * @param irn the node
440 * @param ctx the environment
442 static void cse_walker(ir_node *irn, void *ctx)
444 ir_node *opt = identify_remember(irn);
448 DB((dbg, LEVEL_5, "CSE %+F to %+F\n", irn, opt));
454 * Bottom up walker that ensures that every block gets a block info.
456 * @param irn the node
457 * @param ctx the environment
459 static void block_info_walker(ir_node *irn, void *ctx)
462 pre_env *env = (pre_env*)ctx;
463 alloc_block_info(irn, env);
468 * Topological walker puts nodes in top-down topological order into exp_gen set.
470 * @param irn the node
471 * @param ctx the environment
473 static void topo_walker(ir_node *irn, void *ctx)
480 /* GVN step: remember the value */
481 value = remember(irn);
483 /* no need to put constants into the sets: they are always redundant */
484 if (! is_nice_value(irn) || is_irn_constlike(irn))
487 /* Do not put mode_T nodes info the sets, or PhiT will be created
488 (which are not allowed in Firm). Instead, put the Proj's here only. */
489 if (get_irn_mode(irn) == mode_T)
492 block = get_nodes_block(irn);
493 info = get_block_info(block);
495 if (is_clean_in_block_expgen(irn, block)) {
496 /* two expressions with same value in block;
497 should have been fixed by CSE pass */
498 assert(get_nodes_block(irn) == block &&
499 (! ir_valueset_lookup(info->exp_gen, value)));
501 DB((dbg, LEVEL_5, "%+F clean in block %+F\n", irn, block));
503 ir_valueset_insert(info->exp_gen, value, irn);
504 /* flag irn as clean*/
505 set_irn_link(irn, irn);
507 /* flag irn as not clean */
508 set_irn_link(irn, NULL);
513 * Computes Avail_out(block):
515 * Avail_in(block) = Avail_out(dom(block))
516 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
519 * This function must be called in the top-down topological order:
520 * Then it computes Leader(Nodes(block)) instead of Nodes(block) !
522 * @param block the block
523 * @param ctx walker context
525 static void compute_avail_top_down(ir_node *block, void *ctx)
527 pre_env *env = (pre_env*)ctx;
528 block_info *dom_info;
529 block_info *info = get_block_info(block);
532 /* filter blocks from topological walker */
533 if (! is_Block(block))
536 if (block == env->end_block)
539 /* First, add all nodes from the immediate dominator.
540 This ensures that avail_out contains the leader.
541 The start block has no immediate dominator. */
542 if (block != env->start_block) {
543 dom_block = get_Block_idom(block);
544 assert(is_Block(dom_block));
545 dom_info = get_block_info(dom_block);
547 value_union(info->avail_out, dom_info->avail_out);
549 /* Second, add values from exp_gen. */
550 value_union(info->avail_out, info->exp_gen);
552 dump_value_set(info->avail_out, "Avail_out", block);
556 * Translates an expression above a Phi.
558 * @param node the node
559 * @param block the block the node is translated into
560 * @param pos the input number of the destination block
562 * @return a node representing the translated value
564 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos)
570 ir_node *target_block;
573 if (get_nodes_block(node) == block) {
574 /* a Phi inside target block */
575 return get_Phi_pred(node, pos);
577 /* already outside */
581 arity = get_irn_arity(node);
582 in = XMALLOCN(ir_node *, arity);
584 for (i = 0; i < arity; ++i) {
585 ir_node *pred = get_irn_n(node, i);
586 ir_node *pred_block = get_Block_cfgpred_block(block,pos);
587 ir_node *trans = get_translated(pred, pred_block);
589 /* if node is topologically first in block then
590 there is no translated predecessor.
591 We do not check cleanliness here, so pred might be not clean. */
598 target_block = get_Block_cfgpred_block(block, pos);
600 /* Projections are the sole case where we have to ensure
601 that they are in the same block as their tuple node. */
602 target_block = get_nodes_block(in[0]);
606 get_irn_dbg_info(node),
614 /* We need the attribute copy here, because the Hash value of a
615 node might depend on that. */
616 copy_node_attr(get_irn_irg(node), node, nn);
617 DB((dbg, LEVEL_5, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
620 nn = optimize_node(nn);
621 DB((dbg, LEVEL_5, "New GCSE-optimized node %+F origin %+F\n", nn, node));
623 /* During the insert phase we need to compare the global value numbers
624 of blocks that do not dominate each other. 'Blocksafe' GCSE requires
625 the two equivalent nodes to be in blocks that dominate each other.
626 (see identities_cmp() in iropt.c)
627 If we do not translate a node into the predecessor block, their values
628 will not be considered equivalent. (we are at a merging block.)
629 So we have to translate a node into its predecessor block.
630 If we switched off blocksafety we will find matching values that are
631 not dominating (in loops) which we cannot use.
633 Also, blocksafe GCSE does not kill nn even if its value is already
634 present in the successor because the predecessor blocks do not dominate.
635 This is required for antic_in.
637 The nodes produced here are not necessarily in the designated block.
638 They are used to determine the value of node node.
639 If we use them for hoisting, we need to make sure that they are in the
640 designated block. fix_translated() does this job. */
643 } /* phi_translate */
646 * Block-walker, computes Antic_in(block).
648 * @param block the block
649 * @param ctx the walker environment
651 static void compute_antic(ir_node *block, void *ctx)
653 pre_env *env = (pre_env*)ctx;
654 block_info *succ_info;
660 ir_valueset_iterator_t iter;
662 /* filter blocks from topological walker */
663 if (! is_Block(block))
666 /* no need for computations in start block */
667 if (block == env->start_block)
670 /* the end block has no successor */
671 if (block == env->end_block)
674 info = get_block_info(block);
675 size = ir_valueset_size(info->antic_in);
677 /* This step puts all generated expression from the
678 current block into antic_in.
679 This is needs to be done in the first iteration only. */
680 if (env->first_iter) {
681 foreach_valueset(info->exp_gen, value, expr, iter) {
682 /* We will have phi nodes in antic in.
683 This should prevent special cases in several places. */
684 ir_valueset_insert(info->antic_in, value, expr);
688 /* TODO handle endless loops. */
690 int n_succ = get_Block_n_cfg_outs(block);
694 /* find blocks position in succ's block predecessors */
695 succ = get_Block_cfg_out(block, 0);
696 pos = get_Block_cfgpred_pos(succ, block);
699 succ_info = get_block_info(succ);
700 /* translate into list: we cannot insert into a set we iterate
701 * and succ might be equal to block for endless loops */
702 foreach_valueset(succ_info->antic_in, value, expr, iter) {
706 DB((dbg, LEVEL_5, "Begin phi translate antic: expr %+F from %+F to %d\n", expr, succ, pos));
708 /* TODO if successor block has 1 predecessor we need no phi translation.
709 But the clean_in_block check is still needed! */
710 /* TODO phi translation and clean in block are overlapping,
711 because phi trans perhaps should know in advance if predecessors are clean. */
712 trans = phi_translate(expr, succ, pos);
713 newval = remember(trans);
715 DB((dbg, LEVEL_5, "----> phi translate antic: expr %+F from %+F to %d is trans %+F\n", expr, succ, pos, trans));
717 if (is_clean_in_block_antic(trans, block)) {
718 if (! is_irn_constlike(trans)) {
719 ir_valueset_insert(info->antic_in, newval, trans);
721 DB((dbg, LEVEL_5, " translated %+F clean in %+F\n", trans, block));
724 DB((dbg, LEVEL_5, " translated %+F not clean in %+F\n", trans, block));
727 /* We have to set translated anyway
728 because expr might still be hoisted _into_ block. */
729 set_translated(expr, succ, pos, trans);
731 DB((dbg, LEVEL_5, "- end: expr %+F -----\n\n", expr));
734 } else if (n_succ > 1) {
736 block_info *succ0_info;
740 /* Select a successor to compute the disjoint of all nodes
741 sets, it might be useful to select the block with the
742 smallest number of nodes. For simplicity we choose the
744 succ0 = get_Block_cfg_out(block, 0);
745 succ0_info = get_block_info(succ0);
747 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
748 /* we need the disjoint */
749 for (i = 1; i < n_succ; ++i) {
750 ir_node *succ = get_Block_cfg_out(block, i);
751 block_info *succ_info = get_block_info(succ);
753 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL) {
759 /* we found a value that is common in all Antic_in(succ(b)),
760 put it in Antic_in(b) if the value is not already represented. */
761 if (common && is_clean_in_block_antic(expr, block)) {
762 ir_valueset_insert(info->antic_in, value, expr);
764 set_translated(expr, succ0, 0, expr);
769 dump_value_set(info->antic_in, "Antic_in", block);
770 if (size != ir_valueset_size(info->antic_in)) {
774 } /* compute_antic */
777 * Finds if the value of expr is a partially redundant value in block.
779 * @param block the block
780 * @param expr the expression
782 * @return mode of the expression if it is partially redundant else NULL
784 static ir_mode *find_partially_redundant(ir_node *block, ir_node *expr)
786 ir_node *first_avail = NULL;
788 int arity = get_irn_arity(block);
789 int fully_redundant = 1;
790 int partially_redundant = 0;
791 ir_mode *mode = NULL;
793 DB((dbg, LEVEL_3, "Examine expr %+F of %+F\n", expr, block));
795 /* for each predecessor blocks */
796 for (pos = 0; pos < arity; ++pos) {
797 block_info *pred_info;
798 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
800 ir_node *trans_value;
803 /* ignore bad blocks. */
804 if (is_Bad(pred_block))
807 trans_expr = get_translated(expr, get_Block_cfgpred_block(block,pos));
808 DB((dbg, LEVEL_2, "expr %+F trans @ %d is translated %+F\n", expr, pos, trans_expr));
809 /* exp in antic in, so pred is clean
810 uncover when it is not */
813 trans_value = identify(trans_expr);
814 DB((dbg, LEVEL_2, "trans_value %+F\n", trans_value));
817 pred_info = get_block_info(pred_block);
818 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
819 DB((dbg, LEVEL_2, "avail_expr %+F\n", avail_expr));
821 if (avail_expr == NULL) {
822 /* expr not available */
823 pred_info->avail = expr;
824 pred_info->found = 0;
828 /* expr is available */
829 pred_info->avail = avail_expr;
830 pred_info->found = 1;
831 mode = get_irn_mode(avail_expr);
832 partially_redundant = 1;
834 if (first_avail == NULL)
835 first_avail = avail_expr;
836 else if (first_avail != avail_expr)
837 /* Multiple different expressions are available */
840 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
844 /* If it is not the same value already existing along every predecessor
845 and it is defined by some predecessor then it is partially redundant. */
846 if (! fully_redundant && partially_redundant)
853 * Copies node and its predecessors to a block that dominates the target block.
855 * @param node the node
856 * @param target the target block
858 * @return copy of node node dominating target block
860 static ir_node *fix_translation(ir_node *node, ir_node *target)
867 DB((dbg, LEVEL_1, "Fix_translation %+F into %+F\n", node, target));
869 /* identifies unreachable blocks using domination */
870 if (get_Block_dom_depth(get_nodes_block(node)) < 0 ||
871 (get_Block_dom_depth(target) < 0))
872 return new_r_Bad(get_irn_irg(node), get_irn_mode(node));
874 /* Walk upwards until the node dominates its use in target block.
875 Precondition is that the node is clean. */
876 if (block_dominates(get_nodes_block(node), target))
879 DB((dbg, LEVEL_1, "Fix_translation%+F of node %+F does not dominate target %+F\n", get_nodes_block(node), node, target));
881 arity = get_irn_arity(node);
882 ins = XMALLOCN(ir_node*, arity);
884 for (i = arity - 1; i >= 0; --i) {
885 ir_node *pred = get_irn_n(node, i);
886 ir_node *fixed = fix_translation(pred, target);
888 DB((dbg, LEVEL_1, "Fixed %+F to %+F for node %+F\n", pred, fixed, node));
893 get_irn_dbg_info(node),
901 copy_node_attr(get_irn_irg(node), node, nn);
903 DB((dbg, LEVEL_1, "New fixed node %+F from translated %+F. target %+F\n", nn, node, target));
905 nn = optimize_node(nn);
908 } /* fix_translation */
911 * Updates the new_set of a block by adding the new_set of
912 * the immediate dominating block.
916 static void update_new_set(ir_node *block, ir_node *idom)
920 ir_valueset_iterator_t iter;
921 block_info *curr_info = get_block_info(block);
922 block_info *idom_info = get_block_info(idom);
925 dump_value_set(idom_info->new_set, "[New Set]", idom);
926 foreach_valueset(idom_info->new_set, value, expr, iter) {
927 ir_valueset_insert(curr_info->new_set, value, expr);
928 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
931 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
933 } /* update_new_set */
936 * Perform insertion of partially redundant values.
937 * For every block node, do the following:
938 * 1. Propagate the NEW_SETS of the dominator into the current block.
939 * If the block has multiple predecessors,
940 * 2a. Iterate over the ANTIC expressions for the block to see if
941 * any of them are partially redundant.
942 * 2b. If so, insert them into the necessary predecessors to make
943 * the expression fully redundant.
944 * 2c. Insert a new Phi merging the values of the predecessors.
945 * 2d. Insert the new Phi, and the new expressions, into the
948 * @param block the block
949 * @param ctx the walker environment
951 static void insert_nodes(ir_node *block, void *ctx)
953 pre_env *env = (pre_env*)ctx;
957 block_info *curr_info;
959 int arity = get_irn_arity(block);
960 ir_valueset_iterator_t iter;
962 /* filter only blocks */
963 if (! is_Block(block))
966 /* ensure that even the start block has a new_set */
967 curr_info = get_block_info(block);
968 if (curr_info->new_set)
969 ir_valueset_del(curr_info->new_set);
970 curr_info->new_set = ir_valueset_new(16);
972 if (block == env->start_block)
975 DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
977 idom = get_Block_idom(block);
978 update_new_set(block, idom);
980 /* process only merge blocks */
984 /* for each antic_in */
985 foreach_valueset(curr_info->antic_in, value, expr, iter) {
991 /* filter phi nodes from antic in */
995 /* A value computed in the dominator is totally redundant.
996 Hence we have nothing to insert. */
997 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
998 DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1002 mode = find_partially_redundant(block, expr);
1006 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
1008 phi_in = XMALLOCN(ir_node *, arity);
1010 /* for all predecessor blocks */
1011 for (pos = 0; pos < arity; ++pos) {
1012 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1013 block_info *pred_info;
1015 /* ignore bad blocks. */
1016 if (is_Bad(pred_block)) {
1017 ir_graph *irg = get_irn_irg(pred_block);
1018 phi_in[pos] = new_r_Bad(irg, mode);
1021 pred_info = get_block_info(pred_block);
1023 /* ignore blocks that already have the expression */
1024 if (! pred_info->found) {
1025 ir_node *translated = get_translated(expr, pred_block);
1026 ir_node *trans_value;
1028 /* make sure translated dominates its use */
1029 translated = fix_translation(translated, pred_block);
1030 DB((dbg, LEVEL_3, "Use translated %+F in %+F because expr %+F not available\n", translated, pred_block, expr));
1032 /* make the new node available */
1033 trans_value = remember(translated);
1034 ir_valueset_insert(pred_info->avail_out, trans_value, translated);
1035 phi_in[pos] = translated;
1036 DB((dbg, LEVEL_5, "phi_in %+F\n", translated));
1038 phi_in[pos] = pred_info->avail;
1039 DB((dbg, LEVEL_5, "phi_in %+F\n", pred_info->avail));
1044 phi = new_r_Phi(block, arity, phi_in, mode);
1046 DB((dbg, LEVEL_1, "New %+F for redundant %+F created\n", phi, expr));
1048 phi_value = remember(phi);
1050 /* this 'global' value is now available through the new phi */
1051 ir_valueset_replace(curr_info->avail_out, value, phi);
1052 /* add this phi and its 'blocklocal' value */
1053 ir_valueset_insert(curr_info->avail_out, phi_value, phi);
1055 ir_valueset_insert(curr_info->new_set, value, phi);
1056 ir_valueset_insert(curr_info->new_set, phi_value, phi);
1058 /* remove from antic_in to prevent reprocessing */
1059 ir_valueset_remove_iterator(curr_info->antic_in, &iter);
1063 } /* node_set_foreach */
1064 } /* insert_nodes */
1067 * Walker which finds redundant nodes using avail_out sets
1068 * and exchanges them for existing ones.
1069 * We cannot change the graph here as this would affect
1070 * the hash values of the nodes.
1072 * @param irn the node
1073 * @param ctx the walker environment
1075 static void eliminate(ir_node *irn, void *ctx)
1077 pre_env *env = (pre_env*)ctx;
1079 if (! is_Block(irn)) {
1080 ir_node *block = get_nodes_block(irn);
1081 block_info *bl = get_block_info(block);
1082 ir_node *value = identify(irn);
1084 if (value != NULL) {
1085 ir_node *expr = (ir_node*)ir_valueset_lookup(bl->avail_out, value);
1087 if (expr != NULL && expr != irn) {
1088 elim_pair *p = OALLOC(env->obst, elim_pair);
1092 p->next = env->pairs;
1093 if (get_irn_idx(expr) >= env->last_idx)
1094 p->reason = FS_OPT_GVN_PARTLY;
1096 p->reason = FS_OPT_GVN_FULLY;
1104 * Do all the recorded changes and optimize
1105 * newly created Phi's.
1107 * @param pairs list of elimination pairs
1109 static void eliminate_nodes(elim_pair *pairs)
1113 for (p = pairs; p != NULL; p = p->next) {
1114 /* might be already changed */
1115 p->new_node = skip_Id(p->new_node);
1117 DB((dbg, LEVEL_1, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1118 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1119 * which we can optimize here */
1120 if (is_Phi(p->new_node)) {
1122 ir_node *res = NULL;
1124 for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1125 ir_node *pred = get_irn_n(p->new_node, i);
1127 if (pred != p->old_node) {
1136 exchange(p->new_node, res);
1140 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1141 exchange(p->old_node, p->new_node);
1143 } /* eliminate_nodes */
1146 * Gvn_Pre pass for graph irg.
1148 * @param irg the graph
1150 void do_gvn_pre(ir_graph *irg)
1152 struct obstack obst;
1154 optimization_state_t state;
1155 block_info *bl_info;
1156 unsigned antic_iter;
1157 unsigned insert_iter;
1159 assure_irg_properties(irg,
1160 IR_GRAPH_PROPERTY_CONSISTENT_OUTS
1161 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
1162 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
1163 | IR_GRAPH_PROPERTY_CONSISTENT_POSTDOMINANCE);
1165 /* register a debug mask */
1166 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
1167 /* edges will crash if enabled due to our allocate on other obstack trick */
1168 edges_deactivate(irg);
1170 save_optimization_state(&state);
1173 * If there are two nodes with the same value in one block,
1174 * the exp_gen valueset can only contain one of them. */
1175 set_opt_global_cse(0);
1176 new_identities(irg);
1177 irg_walk_graph(irg, NULL, cse_walker, &a_env);
1179 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1181 /* Switch on GCSE. We need it to correctly compute
1182 the value of a node, which is independent from
1184 set_opt_global_cse(1);
1185 new_identities(irg);
1187 /* setup environment */
1188 obstack_init(&obst);
1191 a_env.start_block = get_irg_start_block(irg);
1192 a_env.end_block = get_irg_end_block(irg);
1195 /* allocate block info */
1196 irg_walk_blkwise_graph(irg, block_info_walker, NULL, &a_env);
1198 /* generate exp_gen */
1199 irg_walk_blkwise_dom_top_down(irg, NULL, topo_walker, &a_env);
1200 dump_all_expgen_sets(a_env.list);
1202 /* compute the avail_out sets for all blocks */
1203 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
1205 /* compute the anticipated value sets for all blocks */
1207 a_env.first_iter = 1;
1209 /* antic_in passes */
1211 DB((dbg, LEVEL_1, "= Antic_in Iteration %d ========================\n",
1214 irg_walk_blkwise_graph(irg, compute_antic, NULL, &a_env);
1215 a_env.first_iter = 0;
1216 DB((dbg, LEVEL_1, "----------------------------------------------\n"));
1217 /* TODO bad endless loop protection */
1218 } while (a_env.changes != 0 && antic_iter < 40);
1220 /* compute redundant expressions */
1222 a_env.last_idx = get_irg_last_idx(irg);
1225 DB((dbg, LEVEL_1, "= Insert Iteration %d ==========================\n", insert_iter));
1227 /* TODO topologically top down would be better; fewer iterations. */
1228 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
1229 DB((dbg, LEVEL_1, "----------------------------------------------\n"));
1230 } while (a_env.changes != 0);
1232 /* last step: eliminate nodes */
1233 irg_walk_graph(irg, NULL, eliminate, &a_env);
1234 eliminate_nodes(a_env.pairs);
1236 /* clean up: delete all sets */
1237 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
1238 ir_valueset_del(bl_info->exp_gen);
1239 ir_valueset_del(bl_info->avail_out);
1240 ir_valueset_del(bl_info->antic_in);
1241 ir_nodehashmap_destroy(bl_info->trans);
1242 free(bl_info->trans);
1243 if (bl_info->new_set)
1244 ir_valueset_del(bl_info->new_set);
1247 obstack_free(&obst, NULL);
1249 /* pin the graph again.
1250 This is needed due to the use of set_opt_global_cse(1) */
1251 set_irg_pinned(irg, op_pin_state_pinned);
1252 restore_optimization_state(&state);
1254 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
1257 /* Creates an ir_graph pass for do_gvn_pre. */
1258 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
1260 return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);