2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
45 #include "irgraph_t.h"
49 /** Additional info we need for every block. */
50 typedef struct block_info {
51 ir_valueset_t *exp_gen; /**< contains this blocks clean expressions */
52 ir_valueset_t *avail_out; /**< The Avail_out set for a block. */
53 ir_valueset_t *antic_in; /**< The Antic_in set for a block. */
54 ir_valueset_t *new_set; /**< The set of all new values for a block. */
55 ir_nodehashmap_t *trans; /**< contains translated nodes in block */
56 ir_node *avail; /**< The get_map(avail, block) result. */
57 ir_node *block; /**< The Block of the block info. */
58 struct block_info *next; /**< Links all entries, so we can recover the sets easily. */
59 int found; /**< Non-zero, if avail was found in this block. */
63 * A pair of nodes that must be exchanged.
64 * We must defer the exchange because our hash-sets cannot
65 * find an already replace node else.
67 typedef struct elim_pair {
68 ir_node *old_node; /**< The old node that will be replaced. */
69 ir_node *new_node; /**< The new node. */
70 struct elim_pair *next; /**< Links all entries in a list. */
71 int reason; /**< The reason for the replacement. */
74 /** The environment for the GVN-PRE algorithm */
75 typedef struct pre_env {
76 struct obstack *obst; /**< The obstack to allocate on. */
77 ir_node *start_block; /**< The start block of the current graph. */
78 ir_node *end_block; /**< The end block of the current graph */
79 block_info *list; /**< Links all block info entries for easier recovery. */
80 elim_pair *pairs; /**< A list of node pairs that must be eliminated. */
81 unsigned last_idx; /**< last node index of "old" nodes, all higher indexes are newly created once. */
82 char changes; /**< Non-zero, if calculation of Antic_in has changed. */
83 char first_iter; /**< non-zero for first iteration */
86 /** The debug module handle. */
87 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
89 /* ---------- Functions for Value sets ---------- */
93 * computes dst = dst \/ src for value sets
95 * @param dst the union result set
96 * @param src the source set
98 static void value_union(ir_valueset_t *dst, ir_valueset_t *src)
100 ir_valueset_iterator_t iter;
104 foreach_valueset(src, value, expr, iter) {
105 /* dominator tree walk; use first available expr as leader */
106 ir_valueset_insert(dst, value, expr);
111 /* ---------- Functions for Values ---------- */
114 * Remember adds a node e to the GCSE valuetable.
116 * @param e a node representing an expression
117 * @return the final value for the expression e
119 static ir_node *remember(ir_node *e)
124 ir_node *pred = get_Proj_pred(e);
125 ir_node *v_pred = identify_remember(pred);
127 if (v_pred != pred) {
128 ir_node *proj = new_r_Proj(v_pred, get_irn_mode(e), get_Proj_proj(e));
129 value = identify_remember(proj);
134 value = identify_remember(e);
139 * Identify does a lookup in the GCSE valuetable.
141 * @param e a node representing an expression
142 * @return a node representing the value or NULL if no identified
144 static ir_node *identify(ir_node *e)
146 return identify_remember(e);
150 * Returns the block info of a block.
152 * @param block the block
153 * @return block info of block
155 static block_info *get_block_info(ir_node *block)
157 return (block_info*)get_irn_link(block);
161 * Allocate block info for block block.
163 * @param block the block
164 * @param env the environment
166 static void alloc_block_info(ir_node *block, pre_env *env)
168 block_info *info = OALLOC(env->obst, block_info);
170 set_irn_link(block, info);
171 info->exp_gen = ir_valueset_new(16);
172 info->avail_out = ir_valueset_new(16);
173 info->antic_in = ir_valueset_new(16);
174 /* valueset has much nicer interface */
175 info->trans = XMALLOC(ir_nodehashmap_t);
176 ir_nodehashmap_init(info->trans);
178 info->new_set = NULL;
183 info->next = env->list;
185 } /* alloc_block_info */
188 * Returns non-zero if a node is movable and a possible candidate for PRE.
191 * @return non-zero if value is nice
193 static int is_nice_value(ir_node *n)
195 ir_mode *mode = get_irn_mode(n);
204 n = get_Proj_pred(n);
206 /* we may not move pinned nodes */
207 if (get_irn_pinned(n) == op_pin_state_pinned)
210 if (!mode_is_data(mode)) {
211 /* Div and Mod are only nice if they do not use memory. */
212 if (! is_Div(n) && ! is_Mod(n))
214 if (! is_NoMem(get_memop_mem(n)))
218 } /* is_nice_value */
225 * @param set the set to dump
226 * @param txt a text to describe the set
227 * @param block the owner block of the set
229 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
231 ir_valueset_iterator_t iter;
236 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
237 foreach_valueset(set, value, expr, iter) {
239 DB((dbg, LEVEL_2, "\n"));
241 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
243 DB((dbg, LEVEL_2, " %+F,", expr));
246 DB((dbg, LEVEL_2, "\n}\n"));
247 } /* dump_value_set */
250 * Dump all exp_gen value sets.
252 * @param list the list of block infos to retrieve the sets from
254 static void dump_all_expgen_sets(block_info *list)
258 for (bl_info = list; bl_info != NULL; bl_info = bl_info->next) {
259 dump_value_set(bl_info->exp_gen, "[Exp_gen]", bl_info->block);
264 #define dump_value_set(set, txt, block)
265 #define dump_all_expgen_sets(list)
266 #endif /* DEBUG_libfirm */
269 * Gets result of nodes phi translation into block.
271 * @param node the node
272 * @param block the target block
274 * @return a phi translation of node node into block block or NULL
276 static ir_node *get_translated(ir_node *node, ir_node *block)
281 if (is_irn_constlike(node))
284 bi = get_block_info(block);
285 trans = ir_nodehashmap_get(ir_node, bi->trans, node);
290 * Saves result of phi translation of node into predecessor
291 * at pos of block succ.
293 * @param node the node
294 * @param succ the successor of the translation target block
295 * @param pos the position of the predecessor block
296 * @param trans the translation result
299 static void set_translated(ir_node *node, ir_node *succ, int pos, ir_node *trans)
301 ir_node *pred = get_Block_cfgpred_block(succ, pos);
302 block_info *bi = get_block_info(pred);
304 ir_nodehashmap_insert(bi->trans, node, trans);
308 * Checks if a node node is clean in block block for use in antic_in.
310 * A clean node in block block can be hoisted above block block.
311 * A node is not clean if its value is killed in block block.
312 * The node can still be hoisted into block block.
314 * @param n the phi translated or not translated node
315 * @param block the block
316 * @return non-zero value for clean node
318 static int is_clean_in_block_antic(ir_node *node, ir_node *block)
322 if (get_irn_mode(node) == mode_M)
325 /* a phi only has predecessors in other blocks */
329 /* constants are in start block */
330 if (is_irn_constlike(node))
333 /* what we really want to check
334 Only for node is translated case; other are clean anyway */
335 if (! is_nice_value(node)) {
339 /* cleanliness depends on nodes predecessors
340 At least if node is translated. */
341 for (i = get_irn_arity(node) - 1; i >= 0; --i) {
342 ir_node *pred = get_irn_n(node, i);
346 if (is_irn_constlike(pred))
349 /* exp_gen only contains clean nodes */
350 if (ir_valueset_lookup(get_block_info(block)->exp_gen, pred))
353 /* block of pred strictly dominates target block. pred irrelevant. */
354 if (block_strictly_dominates(get_nodes_block(pred), block))
357 /* --- pred neither in block, nor dominating -- */
359 /* This pred is in antic_in and such clean.
360 Not every clean pred is in antic_in though.
361 Predecessor might be translated or not */
362 value = identify(pred);
363 if (ir_valueset_lookup(get_block_info(block)->antic_in, value))
366 /* This check is not redundant for translated nodes;
367 non translated ones are already nice. */
368 if (! is_nice_value(pred)) {
369 DB((dbg, LEVEL_5, "unclean %+F because pred %+F not nice\n", node, pred));
373 /* predecessor is not translated. This is legal if
374 predecessor is dominating or in target block (already checked). */
375 trans = get_translated(pred, block);
377 DB((dbg, LEVEL_5, "unclean %+F because pred %+F unclean (not translated)\n", node, pred));
381 /* Node and predecessor are translated, but is pred clean?
382 The value of the translated predecessor has to be in antic_in. */
383 ir_node *value = identify(trans);
384 if (! ir_valueset_lookup(get_block_info(block)->antic_in, value)) {
385 DB((dbg, LEVEL_5, "unclean %+F because pred %+F value %+F not antic\n", node, pred, value));
390 assert(0 && "should have been catched");
395 } /* is_clean_in_block */
398 * Checks if a node n is clean in block block for exp_gen.
401 * @param block the block
402 * @return non-zero value for clean node
404 static int is_clean_in_block_expgen(ir_node *n, ir_node *block)
408 if (get_irn_mode(n) == mode_M)
414 if (! is_nice_value(n))
417 for (i = get_irn_arity(n) - 1; i >= 0; --i) {
418 ir_node *pred = get_irn_n(n, i);
420 /* sufficient for exp_gen because block is always block of node */
421 if (get_nodes_block(pred) != block)
425 so it needs to be clean (already in exp_gen) */
426 if (! get_irn_link(pred)) {
427 DB((dbg, LEVEL_5, "unclean %+F because pred %+F unclean\n", n, pred));
434 } /* is_clean_in_block */
437 * Does blocklocal common subexpression elimination (CSE).
439 * @param irn the node
440 * @param ctx the environment
442 static void cse_walker(ir_node *irn, void *ctx)
444 ir_node *opt = identify_remember(irn);
448 DB((dbg, LEVEL_5, "CSE %+F to %+F\n", irn, opt));
454 * Bottom up walker that ensures that every block gets a block info.
456 * @param irn the node
457 * @param ctx the environment
459 static void block_info_walker(ir_node *irn, void *ctx)
462 pre_env *env = (pre_env*)ctx;
463 alloc_block_info(irn, env);
468 * Topological walker puts nodes in top-down topological order into exp_gen set.
470 * @param irn the node
471 * @param ctx the environment
473 static void topo_walker(ir_node *irn, void *ctx)
480 /* GVN step: remember the value */
481 value = remember(irn);
483 /* no need to put constants into the sets: they are always redundant */
484 if (! is_nice_value(irn) || is_irn_constlike(irn))
487 /* Do not put mode_T nodes info the sets, or PhiT will be created
488 (which are not allowed in Firm). Instead, put the Proj's here only. */
489 if (get_irn_mode(irn) == mode_T)
492 block = get_nodes_block(irn);
493 info = get_block_info(block);
495 if (is_clean_in_block_expgen(irn, block)) {
496 /* two expressions with same value in block;
497 should have been fixed by CSE pass */
498 assert(get_nodes_block(irn) == block &&
499 (! ir_valueset_lookup(info->exp_gen, value)));
501 DB((dbg, LEVEL_5, "%+F clean in block %+F\n", irn, block));
503 ir_valueset_insert(info->exp_gen, value, irn);
504 /* flag irn as clean*/
505 set_irn_link(irn, irn);
507 /* flag irn as not clean */
508 set_irn_link(irn, NULL);
513 * Computes Avail_out(block):
515 * Avail_in(block) = Avail_out(dom(block))
516 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
519 * This function must be called in the top-down topological order:
520 * Then it computes Leader(Nodes(block)) instead of Nodes(block) !
522 * @param block the block
523 * @param ctx walker context
525 static void compute_avail_top_down(ir_node *block, void *ctx)
527 pre_env *env = (pre_env*)ctx;
528 block_info *dom_info;
529 block_info *info = get_block_info(block);
532 /* filter blocks from topological walker */
533 if (! is_Block(block))
536 if (block == env->end_block)
539 /* First, add all nodes from the immediate dominator.
540 This ensures that avail_out contains the leader.
541 The start block has no immediate dominator. */
542 if (block != env->start_block) {
543 dom_block = get_Block_idom(block);
544 assert(is_Block(dom_block));
545 dom_info = get_block_info(dom_block);
547 value_union(info->avail_out, dom_info->avail_out);
549 /* Second, add values from exp_gen. */
550 value_union(info->avail_out, info->exp_gen);
552 dump_value_set(info->avail_out, "Avail_out", block);
556 * Translates an expression above a Phi.
558 * @param node the node
559 * @param block the block the node is translated into
560 * @param pos the input number of the destination block
562 * @return a node representing the translated value
564 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos)
572 if (get_nodes_block(node) == block) {
573 /* a Phi inside target block */
574 return get_Phi_pred(node, pos);
576 /* already outside */
580 arity = get_irn_arity(node);
581 in = XMALLOCN(ir_node *, arity);
583 for (i = 0; i < arity; ++i) {
584 ir_node *pred = get_irn_n(node, i);
585 ir_node *pred_block = get_Block_cfgpred_block(block,pos);
586 ir_node *trans = get_translated(pred, pred_block);
588 /* if node is topologically first in block then
589 there is no translated predecessor.
590 We do not check cleanliness here, so pred might be not clean. */
598 get_irn_dbg_info(node),
600 get_Block_cfgpred_block(block, pos),
606 /* We need the attribute copy here, because the Hash value of a
607 node might depend on that. */
608 copy_node_attr(get_irn_irg(node), node, nn);
609 DB((dbg, LEVEL_5, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
612 nn = optimize_node(nn);
613 DB((dbg, LEVEL_5, "New GCSE-optimized node %+F origin %+F\n", nn, node));
615 /* During the insert phase we need to compare the global value numbers
616 of blocks that do not dominate each other. 'Blocksafe' GCSE requires
617 the two equivalent nodes to be in blocks that dominate each other.
618 (see identities_cmp() in iropt.c)
619 If we do not translate a node into the predecessor block, their values
620 will not be considered equivalent. (we are at a merging block.)
621 So we have to translate a node into its predecessor block.
622 If we switched off blocksafety we will find matching values that are
623 not dominating (in loops) which we cannot use.
625 Also, blocksafe GCSE does not kill nn even if its value is already
626 present in the successor because the predecessor blocks do not dominate.
627 This is required for antic_in.
629 The nodes produced here are not necessarily in the designated block.
630 They are used to determine the value of node node.
631 If we use them for hoisting, we need to make sure that they are in the
632 designated block. fix_translated() does this job. */
635 } /* phi_translate */
638 * Block-walker, computes Antic_in(block).
640 * @param block the block
641 * @param ctx the walker environment
643 static void compute_antic(ir_node *block, void *ctx)
645 pre_env *env = (pre_env*)ctx;
646 block_info *succ_info;
652 ir_valueset_iterator_t iter;
654 /* filter blocks from topological walker */
655 if (! is_Block(block))
658 /* no need for computations in start block */
659 if (block == env->start_block)
662 /* the end block has no successor */
663 if (block == env->end_block)
666 info = get_block_info(block);
667 size = ir_valueset_size(info->antic_in);
669 /* This step puts all generated expression from the
670 current block into antic_in.
671 This is needs to be done in the first iteration only. */
672 if (env->first_iter) {
673 foreach_valueset(info->exp_gen, value, expr, iter) {
674 /* We will have phi nodes in antic in.
675 This should prevent special cases in several places. */
676 ir_valueset_insert(info->antic_in, value, expr);
680 /* TODO handle endless loops. */
682 int n_succ = get_Block_n_cfg_outs(block);
686 /* find blocks position in succ's block predecessors */
687 succ = get_Block_cfg_out(block, 0);
688 pos = get_Block_cfgpred_pos(succ, block);
691 succ_info = get_block_info(succ);
692 /* translate into list: we cannot insert into a set we iterate
693 * and succ might be equal to block for endless loops */
694 foreach_valueset(succ_info->antic_in, value, expr, iter) {
698 DB((dbg, LEVEL_5, "Begin phi translate antic: expr %+F from %+F to %d\n", expr, succ, pos));
700 /* TODO if successor block has 1 predecessor we need no phi translation.
701 But the clean_in_block check is still needed! */
702 /* TODO phi translation and clean in block are overlapping,
703 because phi trans perhaps should know in advance if predecessors are clean. */
704 trans = phi_translate(expr, succ, pos);
705 newval = remember(trans);
707 DB((dbg, LEVEL_5, "----> phi translate antic: expr %+F from %+F to %d is trans %+F\n", expr, succ, pos, trans));
709 if (is_clean_in_block_antic(trans, block)) {
710 if (! is_irn_constlike(trans)) {
711 ir_valueset_insert(info->antic_in, newval, trans);
713 DB((dbg, LEVEL_5, " translated %+F clean in %+F\n", trans, block));
716 DB((dbg, LEVEL_5, " translated %+F not clean in %+F\n", trans, block));
719 /* We have to set translated anyway
720 because expr might still be hoisted _into_ block. */
721 set_translated(expr, succ, pos, trans);
723 DB((dbg, LEVEL_5, "- end: expr %+F -----\n\n", expr));
726 } else if (n_succ > 1) {
728 block_info *succ0_info;
732 /* Select a successor to compute the disjoint of all nodes
733 sets, it might be useful to select the block with the
734 smallest number of nodes. For simplicity we choose the
736 succ0 = get_Block_cfg_out(block, 0);
737 succ0_info = get_block_info(succ0);
739 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
740 /* we need the disjoint */
741 for (i = 1; i < n_succ; ++i) {
742 ir_node *succ = get_Block_cfg_out(block, i);
743 block_info *succ_info = get_block_info(succ);
745 if (ir_valueset_lookup(succ_info->antic_in, value) == NULL) {
751 /* we found a value that is common in all Antic_in(succ(b)),
752 put it in Antic_in(b) if the value is not already represented. */
753 if (common && is_clean_in_block_antic(expr, block)) {
754 ir_valueset_insert(info->antic_in, value, expr);
756 set_translated(expr, succ0, 0, expr);
761 dump_value_set(info->antic_in, "Antic_in", block);
762 if (size != ir_valueset_size(info->antic_in)) {
766 } /* compute_antic */
769 * Finds if the value of expr is a partially redundant value in block.
771 * @param block the block
772 * @param expr the expression
774 * @return mode of the expression if it is partially redundant else NULL
776 static ir_mode *find_partially_redundant(ir_node *block, ir_node *expr)
778 ir_node *first_avail = NULL;
780 int arity = get_irn_arity(block);
781 int fully_redundant = 1;
782 int partially_redundant = 0;
783 ir_mode *mode = NULL;
785 DB((dbg, LEVEL_3, "Examine expr %+F of %+F\n", expr, block));
787 /* for each predecessor blocks */
788 for (pos = 0; pos < arity; ++pos) {
789 block_info *pred_info;
790 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
792 ir_node *trans_value;
795 /* ignore bad blocks. */
796 if (is_Bad(pred_block))
799 trans_expr = get_translated(expr, get_Block_cfgpred_block(block,pos));
800 DB((dbg, LEVEL_2, "expr %+F trans @ %d is translated %+F\n", expr, pos, trans_expr));
801 /* exp in antic in, so pred is clean
802 uncover when it is not */
805 trans_value = identify(trans_expr);
806 DB((dbg, LEVEL_2, "trans_value %+F\n", trans_value));
809 pred_info = get_block_info(pred_block);
810 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
811 DB((dbg, LEVEL_2, "avail_expr %+F\n", avail_expr));
813 if (avail_expr == NULL) {
814 /* expr not available */
815 pred_info->avail = expr;
816 pred_info->found = 0;
820 /* expr is available */
821 pred_info->avail = avail_expr;
822 pred_info->found = 1;
823 mode = get_irn_mode(avail_expr);
824 partially_redundant = 1;
826 if (first_avail == NULL)
827 first_avail = avail_expr;
828 else if (first_avail != avail_expr)
829 /* Multiple different expressions are available */
832 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
836 /* If it is not the same value already existing along every predecessor
837 and it is defined by some predecessor then it is partially redundant. */
838 if (! fully_redundant && partially_redundant)
845 * Copies node and its predecessors to a block that dominates the target block.
847 * @param node the node
848 * @param target the target block
850 * @return copy of node node dominating target block
852 static ir_node *fix_translation(ir_node *node, ir_node *target)
859 DB((dbg, LEVEL_1, "Fix_translation %+F into %+F\n", node, target));
861 /* identifies unreachable blocks using domination */
862 if (get_Block_dom_depth(get_nodes_block(node)) < 0 ||
863 (get_Block_dom_depth(target) < 0))
864 return new_r_Bad(get_irn_irg(node), get_irn_mode(node));
866 /* Walk upwards until the node dominates its use in target block.
867 Precondition is that the node is clean. */
868 if (block_dominates(get_nodes_block(node), target))
871 DB((dbg, LEVEL_1, "Fix_translation%+F of node %+F does not dominate target %+F\n", get_nodes_block(node), node, target));
873 arity = get_irn_arity(node);
874 ins = XMALLOCN(ir_node*, arity);
876 for (i = arity - 1; i >= 0; --i) {
877 ir_node *pred = get_irn_n(node, i);
878 ir_node *fixed = fix_translation(pred, target);
880 DB((dbg, LEVEL_1, "Fixed %+F to %+F for node %+F\n", pred, fixed, node));
885 get_irn_dbg_info(node),
893 copy_node_attr(get_irn_irg(node), node, nn);
895 DB((dbg, LEVEL_1, "New fixed node %+F from translated %+F. target %+F\n", nn, node, target));
897 nn = optimize_node(nn);
900 } /* fix_translation */
903 * Updates the new_set of a block by adding the new_set of
904 * the immediate dominating block.
908 static void update_new_set(ir_node *block, ir_node *idom)
912 ir_valueset_iterator_t iter;
913 block_info *curr_info = get_block_info(block);
914 block_info *idom_info = get_block_info(idom);
917 dump_value_set(idom_info->new_set, "[New Set]", idom);
918 foreach_valueset(idom_info->new_set, value, expr, iter) {
919 ir_valueset_insert(curr_info->new_set, value, expr);
920 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
923 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
925 } /* update_new_set */
928 * Perform insertion of partially redundant values.
929 * For every block node, do the following:
930 * 1. Propagate the NEW_SETS of the dominator into the current block.
931 * If the block has multiple predecessors,
932 * 2a. Iterate over the ANTIC expressions for the block to see if
933 * any of them are partially redundant.
934 * 2b. If so, insert them into the necessary predecessors to make
935 * the expression fully redundant.
936 * 2c. Insert a new Phi merging the values of the predecessors.
937 * 2d. Insert the new Phi, and the new expressions, into the
940 * @param block the block
941 * @param ctx the walker environment
943 static void insert_nodes(ir_node *block, void *ctx)
945 pre_env *env = (pre_env*)ctx;
949 block_info *curr_info;
951 int arity = get_irn_arity(block);
952 ir_valueset_iterator_t iter;
954 /* filter only blocks */
955 if (! is_Block(block))
958 /* ensure that even the start block has a new_set */
959 curr_info = get_block_info(block);
960 if (curr_info->new_set)
961 ir_valueset_del(curr_info->new_set);
962 curr_info->new_set = ir_valueset_new(16);
964 if (block == env->start_block)
967 DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
969 idom = get_Block_idom(block);
970 update_new_set(block, idom);
972 /* process only merge blocks */
976 /* for each antic_in */
977 foreach_valueset(curr_info->antic_in, value, expr, iter) {
983 /* filter phi nodes from antic in */
987 /* A value computed in the dominator is totally redundant.
988 Hence we have nothing to insert. */
989 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
990 DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
994 mode = find_partially_redundant(block, expr);
998 DB((dbg, LEVEL_1, "Partial redundant %+F from block %+F found\n", expr, block));
1000 phi_in = XMALLOCN(ir_node *, arity);
1002 /* for all predecessor blocks */
1003 for (pos = 0; pos < arity; ++pos) {
1004 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1005 block_info *pred_info;
1007 /* ignore bad blocks. */
1008 if (is_Bad(pred_block)) {
1009 ir_graph *irg = get_irn_irg(pred_block);
1010 phi_in[pos] = new_r_Bad(irg, mode);
1013 pred_info = get_block_info(pred_block);
1015 /* ignore blocks that already have the expression */
1016 if (! pred_info->found) {
1017 ir_node *translated = get_translated(expr, pred_block);
1018 ir_node *trans_value;
1020 /* make sure translated dominates its use */
1021 translated = fix_translation(translated, pred_block);
1022 DB((dbg, LEVEL_3, "Use translated %+F in %+F because expr %+F not available\n", translated, pred_block, expr));
1024 /* make the new node available */
1025 trans_value = remember(translated);
1026 ir_valueset_insert(pred_info->avail_out, trans_value, translated);
1027 phi_in[pos] = translated;
1028 DB((dbg, LEVEL_5, "phi_in %+F\n", translated));
1030 phi_in[pos] = pred_info->avail;
1031 DB((dbg, LEVEL_5, "phi_in %+F\n", pred_info->avail));
1036 phi = new_r_Phi(block, arity, phi_in, mode);
1038 DB((dbg, LEVEL_1, "New %+F for redundant %+F created\n", phi, expr));
1040 phi_value = remember(phi);
1042 /* this 'global' value is now available through the new phi */
1043 ir_valueset_replace(curr_info->avail_out, value, phi);
1044 /* add this phi and its 'blocklocal' value */
1045 ir_valueset_insert(curr_info->avail_out, phi_value, phi);
1047 ir_valueset_insert(curr_info->new_set, value, phi);
1048 ir_valueset_insert(curr_info->new_set, phi_value, phi);
1050 /* remove from antic_in to prevent reprocessing */
1051 ir_valueset_remove_iterator(curr_info->antic_in, &iter);
1055 } /* node_set_foreach */
1056 } /* insert_nodes */
1059 * Walker which finds redundant nodes using avail_out sets
1060 * and exchanges them for existing ones.
1061 * We cannot change the graph here as this would affect
1062 * the hash values of the nodes.
1064 * @param irn the node
1065 * @param ctx the walker environment
1067 static void eliminate(ir_node *irn, void *ctx)
1069 pre_env *env = (pre_env*)ctx;
1071 if (! is_Block(irn)) {
1072 ir_node *block = get_nodes_block(irn);
1073 block_info *bl = get_block_info(block);
1074 ir_node *value = identify(irn);
1076 if (value != NULL) {
1077 ir_node *expr = (ir_node*)ir_valueset_lookup(bl->avail_out, value);
1079 if (expr != NULL && expr != irn) {
1080 elim_pair *p = OALLOC(env->obst, elim_pair);
1084 p->next = env->pairs;
1085 if (get_irn_idx(expr) >= env->last_idx)
1086 p->reason = FS_OPT_GVN_PARTLY;
1088 p->reason = FS_OPT_GVN_FULLY;
1096 * Do all the recorded changes and optimize
1097 * newly created Phi's.
1099 * @param pairs list of elimination pairs
1101 static void eliminate_nodes(elim_pair *pairs)
1105 for (p = pairs; p != NULL; p = p->next) {
1106 /* might be already changed */
1107 p->new_node = skip_Id(p->new_node);
1109 DB((dbg, LEVEL_1, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1110 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1111 * which we can optimize here */
1112 if (is_Phi(p->new_node)) {
1114 ir_node *res = NULL;
1116 for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1117 ir_node *pred = get_irn_n(p->new_node, i);
1119 if (pred != p->old_node) {
1128 exchange(p->new_node, res);
1132 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1133 exchange(p->old_node, p->new_node);
1135 } /* eliminate_nodes */
1138 * Gvn_Pre pass for graph irg.
1140 * @param irg the graph
1142 void do_gvn_pre(ir_graph *irg)
1144 struct obstack obst;
1146 optimization_state_t state;
1147 block_info *bl_info;
1148 unsigned antic_iter;
1149 unsigned insert_iter;
1151 assure_irg_properties(irg,
1152 IR_GRAPH_PROPERTY_CONSISTENT_OUTS
1153 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
1154 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
1155 | IR_GRAPH_PROPERTY_CONSISTENT_POSTDOMINANCE);
1157 /* register a debug mask */
1158 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
1159 /* edges will crash if enabled due to our allocate on other obstack trick */
1160 edges_deactivate(irg);
1162 save_optimization_state(&state);
1165 * If there are two nodes with the same value in one block,
1166 * the exp_gen valueset can only contain one of them. */
1167 set_opt_global_cse(0);
1168 new_identities(irg);
1169 irg_walk_graph(irg, NULL, cse_walker, &a_env);
1171 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1173 /* Switch on GCSE. We need it to correctly compute
1174 the value of a node, which is independent from
1176 set_opt_global_cse(1);
1177 new_identities(irg);
1179 /* setup environment */
1180 obstack_init(&obst);
1183 a_env.start_block = get_irg_start_block(irg);
1184 a_env.end_block = get_irg_end_block(irg);
1187 /* allocate block info */
1188 irg_walk_blkwise_graph(irg, block_info_walker, NULL, &a_env);
1190 /* generate exp_gen */
1191 irg_walk_blkwise_dom_top_down(irg, NULL, topo_walker, &a_env);
1192 dump_all_expgen_sets(a_env.list);
1194 /* compute the avail_out sets for all blocks */
1195 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, &a_env);
1197 /* compute the anticipated value sets for all blocks */
1199 a_env.first_iter = 1;
1201 /* antic_in passes */
1203 DB((dbg, LEVEL_1, "= Antic_in Iteration %d ========================\n",
1206 irg_walk_blkwise_graph(irg, compute_antic, NULL, &a_env);
1207 a_env.first_iter = 0;
1208 DB((dbg, LEVEL_1, "----------------------------------------------\n"));
1209 /* TODO bad endless loop protection */
1210 } while (a_env.changes != 0 && antic_iter < 40);
1212 /* compute redundant expressions */
1214 a_env.last_idx = get_irg_last_idx(irg);
1217 DB((dbg, LEVEL_1, "= Insert Iteration %d ==========================\n", insert_iter));
1219 /* TODO topologically top down would be better; fewer iterations. */
1220 dom_tree_walk_irg(irg, insert_nodes, NULL, &a_env);
1221 DB((dbg, LEVEL_1, "----------------------------------------------\n"));
1222 } while (a_env.changes != 0);
1224 /* last step: eliminate nodes */
1225 irg_walk_graph(irg, NULL, eliminate, &a_env);
1226 eliminate_nodes(a_env.pairs);
1228 /* clean up: delete all sets */
1229 for (bl_info = a_env.list; bl_info != NULL; bl_info = bl_info->next) {
1230 ir_valueset_del(bl_info->exp_gen);
1231 ir_valueset_del(bl_info->avail_out);
1232 ir_valueset_del(bl_info->antic_in);
1233 ir_nodehashmap_destroy(bl_info->trans);
1234 free(bl_info->trans);
1235 if (bl_info->new_set)
1236 ir_valueset_del(bl_info->new_set);
1239 obstack_free(&obst, NULL);
1241 /* pin the graph again.
1242 This is needed due to the use of set_opt_global_cse(1) */
1243 set_irg_pinned(irg, op_pin_state_pinned);
1244 restore_optimization_state(&state);
1246 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
1249 /* Creates an ir_graph pass for do_gvn_pre. */
1250 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
1252 return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);