2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
46 #include "irgraph_t.h"
51 #define MAX_ANTIC_ITER 10
52 #define MAX_INSERT_ITER 3
56 #define BETTER_GREED 0
59 #define NO_INF_LOOPS 0
61 /** Additional info we need for every block. */
62 typedef struct block_info {
63 ir_valueset_t *exp_gen; /* contains this blocks clean expressions */
64 ir_valueset_t *avail_out; /* available values at block end */
65 ir_valueset_t *antic_in; /* clean anticipated values at block entry */
66 ir_valueset_t *antic_done; /* keeps elements of antic_in after insert nodes phase*/
67 ir_valueset_t *new_set; /* new by hoisting made available values */
68 ir_nodehashmap_t *trans; /* contains translated nodes translated into block */
69 ir_node *avail; /* saves available node for insert node phase */
70 int found; /* saves kind of availability for insert_node phase */
71 ir_node *block; /* block of the block_info */
72 struct block_info *next; /* links all instances for easy access */
76 * A pair of nodes to be exchanged.
77 * We have to defer the exchange because our hash-sets cannot
78 * find an already replaced node.
80 typedef struct elim_pair {
81 ir_node *old_node; /* node that will be replaced */
82 ir_node *new_node; /* replacement for old_node */
83 struct elim_pair *next; /* links all instances for easy access */
84 int reason; /* reason for the replacement */
87 /** environment for the GVN-PRE algorithm */
88 typedef struct pre_env {
89 ir_graph *graph; /* current graph */
90 struct obstack *obst; /* obstack to allocate on */
91 ir_node *start_block; /* start block of the current graph */
92 ir_node *end_block; /* end block of the current graph */
93 ir_node *end_node; /* end node of the current graph */
94 block_info *list; /* block_info list head */
95 elim_pair *pairs; /* elim_pair list head */
96 ir_nodeset_t *keeps; /* a list of to be removed phis to kill their keep alive edges */
97 unsigned last_idx; /* last node index of input graph */
98 char changes; /* flag for fixed point iterations - non-zero if changes occurred */
99 char first_iter; /* non-zero for first fixed point iteration */
102 static pre_env *environ;
103 /* custom GVN value map */
104 static ir_nodehashmap_t value_map;
106 /* debug module handle */
107 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
111 /* --------------------------------------------------------
113 * --------------------------------------------------------
116 typedef struct gvnpre_statistics {
123 int first_iter_found;
124 int antic_iterations;
125 int insert_iterations;
129 gvnpre_statistics *gvnpre_stats = NULL;
131 static void init_stats()
133 gvnpre_stats = XMALLOCZ(gvnpre_statistics);
136 static void free_stats()
142 static void print_stats()
144 gvnpre_statistics *stats = gvnpre_stats;
145 DB((dbg, LEVEL_1, "replaced : %d\n", stats->replaced));
146 DB((dbg, LEVEL_1, "antic_in iterations : %d\n", stats->antic_iterations));
147 DB((dbg, LEVEL_1, "insert iterations : %d\n", stats->insert_iterations));
148 DB((dbg, LEVEL_1, "infinite loops : %d\n", stats->infinite_loops));
149 DB((dbg, LEVEL_1, "fully redundant : %d\n", stats->fully));
150 DB((dbg, LEVEL_1, "partially redundant : %d\n", stats->partially));
151 DB((dbg, LEVEL_1, " loads : %d\n", stats->loads));
152 DB((dbg, LEVEL_1, " Divs/Mods : %d\n", stats->divmods));
153 DB((dbg, LEVEL_1, " hoist high : %d\n", stats->hoist_high));
154 DB((dbg, LEVEL_1, " first iteration : %d\n", stats->first_iter_found));
157 #define set_stats(var, value) (var)=(value)
158 #define inc_stats(var) ((var)+=1)
160 /* --------------------------------------------------------
162 * --------------------------------------------------------
168 * @param set the set to dump
169 * @param txt a text to describe the set
170 * @param block the owner block of the set
172 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
174 ir_valueset_iterator_t iter;
175 ir_node *value, *expr;
178 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
180 foreach_valueset(set, value, expr, iter) {
182 DB((dbg, LEVEL_2, "\n"));
184 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
186 DB((dbg, LEVEL_2, " %+F,", expr));
189 DB((dbg, LEVEL_2, "\n}\n"));
190 } /* dump_value_set */
193 * Dump all exp_gen value sets.
195 * @param list the list of block infos to retrieve the sets from
197 static void dump_all_expgen_sets(block_info *list)
199 block_info *block_info;
201 for (block_info = list; block_info != NULL; block_info = block_info->next) {
202 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
208 #define dump_all_expgen_sets(list)
209 #define dump_value_set(set, txt, block)
211 #endif /* DEBUG_libfirm */
213 /* --------------------------------------------------------
215 * --------------------------------------------------------
219 * Compares node collisions in valuetable.
220 * Modified identities_cmp().
222 static int compare_gvn_identities(const void *elt, const void *key)
224 ir_node *a = (ir_node *)elt;
225 ir_node *b = (ir_node *)key;
228 if (a == b) return 0;
230 /* phi nodes kill predecessor values and are always different */
231 if (is_Phi(a) || is_Phi(b))
235 assert(! is_Call(a) || is_memop(a));
236 assert(! is_Load(a) || is_memop(a));
238 assert(! is_Call(b) || is_memop(b));
239 assert(! is_Load(b) || is_memop(b));
241 /* memops are not the same, even if we want optimize them
242 we have to take the order in account */
243 if (is_memop(a) || is_memop(b))
247 if (is_Call(a) || is_Call(b))
251 if ((get_irn_op(a) != get_irn_op(b)) ||
252 (get_irn_mode(a) != get_irn_mode(b))) return 1;
254 /* compare if a's in and b's in are of equal length */
255 irn_arity_a = get_irn_arity(a);
256 if (irn_arity_a != get_irn_arity(b))
259 /* blocks are never the same */
260 if (is_Block(a) || is_Block(b))
263 /* TODO depends on load optimization */
264 if (get_irn_pinned(a) == op_pin_state_pinned) {
265 /* for pinned nodes, the block inputs must be equal */
266 if (get_irn_n(a, -1) != get_irn_n(b, -1))
269 /* we need global values independently from their blocks */
270 assert(get_opt_global_cse());
273 /* compare a->in[0..ins] with b->in[0..ins] */
274 for (i = 0; i < irn_arity_a; ++i) {
275 ir_node *pred_a = get_irn_n(a, i);
276 ir_node *pred_b = get_irn_n(b, i);
277 if (pred_a != pred_b) {
278 if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
284 * here, we already now that the nodes are identical except their
287 if (a->op->ops.node_cmp_attr)
288 return a->op->ops.node_cmp_attr(a, b);
294 * Identify does a lookup in the GVN valuetable.
295 * To be used when no new GVN values are to be created.
297 * @param e a node representing an expression
298 * @return a node representing the value
300 static ir_node *identify(ir_node *irn)
302 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
305 /* irn represents a new value, so return the leader */
306 return identify_remember(irn);
310 * remember() adds node irn to the GVN valuetable.
311 * Identify_remember only identifies values of nodes with the
312 * same predecessor nodes (not values). By creating a node from the predecessor
313 * values/leaders, a true valuetree is built. Phis kill their predecessor value,
314 * so no circular dependencies need to be resolved.
317 * Maybe this could be implemented with a custom node hash that takes
318 * phi nodes and true values (instead of predecessors) into account,
319 * resulting in value numbers.
320 * TODO This unnecessarily also handles nodes like calls, which are never equal.
322 * @param irn a node representing an expression
323 * @return the value of the expression
325 static ir_node *remember(ir_node *irn)
327 int arity = get_irn_arity(irn);
330 ir_node **in = XMALLOCN(ir_node *, arity);
333 for (i = 0; i < arity; ++i) {
334 ir_node *pred = get_irn_n(irn, i);
335 /* value and leader at the same time */
336 ir_node *pred_value = identify(pred);
338 /* phi will be translated anyway, so kill the predecessor values
339 this also prevents circular dependencies */
341 /* every phi represents its own value */
346 /* predecessor is not its value representation/the leader */
347 if (pred != pred_value)
353 /* create representative for */
354 ir_node *nn = new_ir_node(
355 get_irn_dbg_info(irn),
357 get_nodes_block(irn),
362 copy_node_attr(environ->graph, irn, nn);
364 /* now the value can be determined because the
365 predecessors are the leaders */
366 value = identify_remember(nn);
368 value = identify_remember(irn);
372 DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
373 ir_nodehashmap_insert(&value_map, irn, value);
378 /** When the value map has been built we may lookup expressions
379 * and remember them if new.
381 static ir_node *identify_or_remember(ir_node *irn)
383 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
387 return remember(irn);
390 /* --------------------------------------------------------
392 * --------------------------------------------------------
396 * Allocate block info for block block.
398 * @param block the block
399 * @param env the environment
401 static void alloc_block_info(ir_node *block, pre_env *env)
403 block_info *info = OALLOC(env->obst, block_info);
405 set_irn_link(block, info);
406 info->exp_gen = ir_valueset_new(16);
407 info->avail_out = ir_valueset_new(16);
408 info->antic_in = ir_valueset_new(16);
409 info->antic_done = ir_valueset_new(16);
410 info->trans = XMALLOC(ir_nodehashmap_t);
411 ir_nodehashmap_init(info->trans);
413 info->new_set = NULL;
418 info->next = env->list;
420 } /* alloc_block_info */
422 static void free_block_info(block_info *block_info)
424 ir_valueset_del(block_info->exp_gen);
425 ir_valueset_del(block_info->avail_out);
426 ir_valueset_del(block_info->antic_in);
427 if (block_info->trans) {
428 ir_nodehashmap_destroy(block_info->trans);
429 free(block_info->trans);
431 if (block_info->new_set)
432 ir_valueset_del(block_info->new_set);
436 * Bottom up walker that ensures that every block gets a block info.
438 * @param irn the node
439 * @param ctx the environment
441 static void block_info_walker(ir_node *irn, void *ctx)
444 pre_env *env = (pre_env*)ctx;
445 alloc_block_info(irn, env);
450 * Returns the block info of a block.
452 static block_info *get_block_info(ir_node *block)
454 return (block_info*)get_irn_link(block);
457 /* --------------------------------------------------------
458 * Infinite loop analysis
459 * --------------------------------------------------------
463 * Walker to set block marks and loop links to 0.
465 static void clear_block_mark_loop_link(ir_node *block, void *env)
469 if (is_Block(block)) {
470 set_Block_mark(block, 0);
471 set_loop_link(get_irn_loop(block), NULL);
476 * Returns non-zero if block is part of real loop loop.
479 static unsigned in_loop(ir_node *block, ir_loop *loop)
481 ir_loop *l = get_irn_loop(block);
482 ir_loop *outer = get_irg_loop(environ->graph);
485 /* loop tree root is not a loop */
486 if (l == NULL || l == outer)
488 l = get_loop_outer_loop(l);
494 * Returns the outermost real loop of loop.
496 static ir_loop *get_loop_outermost(ir_loop *loop)
498 ir_loop *outer = get_irg_loop(environ->graph);
500 ir_loop *last = NULL;
504 l = get_loop_outer_loop(l);
510 * Topologic bottom-up walker sets links of infinite loops to non-zero.
511 * Block marks are used to flag blocks reachable (from end) on the one hand,
512 * on the other hand they are set if the block is not part of an infinite loop.
514 static void infinite_loop_walker(ir_node *block, void *env)
520 if (! is_Block(block))
523 /* start block has no predecessors */
524 if (block == environ->start_block)
527 arity = get_irn_arity(block);
529 /* block not part of a real loop: no infinite loop */
530 if (get_irn_loop(block) == get_irg_loop(environ->graph))
531 set_Block_mark(block, 1);
533 if (get_Block_mark(block)) {
534 /* reachable block: mark all cf predecessors */
535 for (i = 0; i < arity; ++i) {
536 ir_node *pred = get_Block_cfgpred_block(block, i);
539 set_Block_mark(pred, 1);
542 /* We are in a real loop and see an unreachable block. */
543 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
545 /* flag loop as infinite */
546 set_loop_link(outermost_loop, outermost_loop);
547 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
549 /* The cf predecessors are unreachable, but can never be part of
550 an infinite loop, because we just reached them. So we set the
551 blockmark to prevent triggering the infinite loop detection. */
553 /* passing information to the cf predecessors */
554 for (i = 0; i < arity; ++i) {
555 ir_node *pred = get_Block_cfgpred_block(block, i);
560 /* If our cf predecessor is in the same endless loop,
561 it is also unreachable. */
562 if (in_loop(pred, outermost_loop)) {
563 set_Block_mark(pred, 0);
565 /* When we leave the unreachable loop, we artificially
566 declare the cf predecessor reachable. */
567 set_Block_mark(pred, 1);
574 * Sets loop links of outermost infinite loops to non-zero.
576 static void analyse_loops(ir_graph *irg)
578 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
580 /* reset block mark and loop links */
581 irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
583 /* mark end block reachable */
584 set_Block_mark(get_irg_end_block(irg), 1);
585 irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
587 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
592 * Returns non-zero if block is part of an infinite loop.
594 static unsigned is_in_infinite_loop(ir_node *block)
598 assert(is_Block(block));
599 loop = get_irn_loop(block);
602 loop = get_loop_outermost(loop);
604 return (get_loop_link(loop) != NULL);
610 /* --------------------------------------------------------
612 * --------------------------------------------------------
616 * Helper function to get the anti leader of node in block.
618 static ir_node *get_anti_leader(ir_node *node, ir_node *block)
620 block_info *info = get_block_info(block);
621 ir_node *value = identify(node);
622 ir_node *leader = ir_valueset_lookup(info->antic_in, value);
627 /* this applies in the context of this algorithm */
632 * Returns non-zero if a node is movable and a possible candidate for PRE.
634 static unsigned is_nice_value(ir_node *n)
636 ir_mode *mode = get_irn_mode(n);
642 if (is_Proj(n) && mode != mode_X && mode != mode_T)
651 return (get_Load_volatility(n) == volatility_non_volatile);
654 if (get_irn_pinned(n) == op_pin_state_pinned)
657 if (! mode_is_data(mode)) {
658 if (! is_Div(n) && ! is_Mod(n))
665 * Checks if a node n is clean in block block for exp_gen.
668 * @param block the block
669 * @return non-zero value for clean node
671 static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
678 if (! is_nice_value(n))
681 arity = get_irn_arity(n);
682 for (i = 0; i < arity; ++i) {
683 ir_node *pred = get_irn_n(n, i);
689 /* we only handle current block */
690 if (get_nodes_block(pred) != block)
693 if (! is_nice_value(pred))
696 value = identify(pred);
697 if (! ir_valueset_lookup(valueset, value))
705 * Topological walker puts nodes in top-down topological order into exp_gen set.
706 * Assumed to walk blockwise and nodewise topologically top-down.
708 * @param irn the node
709 * @param ctx the environment
711 static void topo_walker(ir_node *irn, void *ctx)
721 if (! is_nice_value(irn))
724 /* GVN step: remember the value. */
725 value = remember(irn);
727 if (is_irn_constlike(irn))
730 block = get_nodes_block(irn);
731 info = get_block_info(block);
733 ir_valueset_insert(info->avail_out, value, irn);
735 if (is_clean_in_block(irn, block, info->exp_gen)) {
736 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
738 ir_valueset_insert(info->exp_gen, value, irn);
740 /* flag irn as clean*/
741 set_irn_link(irn, irn);
743 /* flag irn as not clean */
744 set_irn_link(irn, NULL);
749 /* --------------------------------------------------------
751 * --------------------------------------------------------
755 * Gets result of nodes phi translation into block.
757 * @param node the node
758 * @param block the target block
760 * @return a phi translation of node node into block block or NULL
762 static ir_node *get_translated(ir_node *block, ir_node *node)
765 if (is_irn_constlike(node))
768 trans = ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
775 static ir_node *get_translated_pred(ir_node *block, int pos, ir_node *node)
778 if (is_irn_constlike(node))
781 pred_block = get_Block_cfgpred_block(block, pos);
782 return ir_nodehashmap_get(ir_node, get_block_info(pred_block)->trans, node);
786 * Saves result of phi translation of node into predecessor
787 * at pos of block succ.
789 * @param node the node
790 * @param succ the successor of the translation target block
791 * @param pos the position of the predecessor block
792 * @param trans the translation result
795 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
797 if (is_irn_constlike(node))
799 /* insert or replace */
800 ir_nodehashmap_insert(map, node, trans);
805 * Checks if node is hoistable into block.
807 * A clean node in block block can be hoisted above block succ.
808 * A node is not clean if its representative is killed in block block.
809 * The node can still be hoisted into block block.
811 * @param n the node to be checked
812 * @param block the block to be hoisted into
813 * @returns block which node can be hoisted above
815 static unsigned is_hoistable_above(ir_node *node, ir_node *block, unsigned translated)
818 int arity = get_irn_arity(node);
820 /* make sure that node can be hoisted above block */
822 if (is_irn_constlike(node))
825 for (i = 0; i < arity; ++i) {
826 block_info *info = get_block_info(block);
827 ir_node *pred = get_irn_n(node, i);
828 ir_node *pred_value = identify(pred);
829 ir_node *pred_block = get_nodes_block(pred);
831 /* predecessor strictly dominating */
832 if (block_strictly_dominates(pred_block, block))
835 /* if we didn't translate the exact representative we cannot translate */
836 if (translated && !get_translated(pred_block, pred))
839 if (! ir_valueset_lookup(info->antic_in, pred_value))
847 /* Helper function to compare the values of pred and avail_pred. */
848 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
850 ir_node *avail_value = identify(avail_pred);
851 ir_node *trans_pred = get_translated_pred(block, pos, pred);
854 if (trans_pred == NULL)
856 value = identify(trans_pred);
858 DB((dbg, LEVEL_3, "manual compare %+F %+F\n", pred, avail_pred));
860 return (value == avail_value);
866 * Does phi translation for redundant load nodes only.
867 * Returns NULL for non-redundant loads, which need to be phi translated.
868 * Loads are compared by comparing their pointer values,
869 * and assuring that they are adjacent.
870 * This is equivalent to what phi_translation does.
872 static ir_node *phi_translate_load(ir_node *load, ir_node *block, int pos)
874 ir_node *mem = get_Load_mem(load);
875 ir_node *trans = get_translated_pred(block, pos, mem);
880 /* no partial redundancy if this is a mode_M phi */
881 if (is_Proj(trans)) {
882 /* The last memory operation in predecessor block */
883 ir_node *avail_load = get_Proj_pred(trans);
885 /* memop is a load with matching type */
886 if (is_Load(avail_load) &&
887 get_Load_mode(load) == get_Load_mode(avail_load)) {
889 unsigned match = match_pred(get_Load_ptr(load), get_Load_ptr(avail_load), block, pos);
901 * Does phi translation for redundant Div/Mod nodes only.
902 * Returns NULL for non-redundant node, which needs to be phi translated.
904 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
906 ir_node *mem = get_memop_mem(divmod);
907 ir_node *trans = get_translated_pred(block, pos, mem);
912 /* no partial redundancy if this is a mode_M phi */
913 if (is_Proj(trans)) {
914 /* The last memory operation in predecessor block */
915 ir_node *avail_op = get_Proj_pred(trans);
917 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
918 unsigned left, right;
920 if (is_Div(avail_op)) {
921 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
922 get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
924 left = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
925 right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
930 } else if (is_Mod(avail_op)) {
931 if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
933 left = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
934 right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
947 * Translates an expression above a Phi.
949 * @param node the node
950 * @param block the block the node is translated into
951 * @param pos the input number of the destination block
953 * @return a node representing the translated value
955 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_node *pred_block)
960 ir_node *target_block;
965 if (get_nodes_block(node) == block)
966 return get_Phi_pred(node, pos);
967 /* this phi does not need translation */
970 arity = get_irn_arity(node);
974 ir_node *avail_load = phi_translate_load(node, block, pos);
981 if (is_Div(node) || is_Mod(node)) {
982 ir_node *avail_op = phi_translate_divmod(node, block, pos);
989 in = XMALLOCN(ir_node *, arity);
991 /* A value has several representatives. The anti leader is chosen to be
992 the main representative. If we access a node as representative of a
993 value we always use the anti leader. The anti leader can be found by
994 antic_in(identify(node)). */
995 for (i = 0; i < arity; ++i) {
996 /* get anti leader for pred to lookup its translated value */
997 ir_node *pred = get_irn_n(node, i);
998 ir_node *anti_leader = get_anti_leader(pred, block);
999 /* we cannot find this value in antic_in, because the value
1000 has (possibly) changed! */
1001 ir_node *pred_trans = get_translated(pred_block, anti_leader);
1004 if (pred_trans == NULL) {
1008 /* predecessor value changed, so translation is needed */
1009 if (identify(expr) != identify(pred))
1013 DB((dbg, LEVEL_3, "in %+F\n", expr));
1020 DB((dbg, LEVEL_3, "Translate\n"));
1022 target_block = get_Block_cfgpred_block(block, pos);
1024 target_block = get_nodes_block(in[0]);
1026 /* copy node to represent the new value.
1027 We do not translate nodes that do not need translation,
1028 so we use the newly created nodes as value representatives only.
1029 Their block is not important, because we create new ones during
1030 insert node phase. */
1032 get_irn_dbg_info(node),
1040 /* We need the attribute copy here, because the Hash value of a
1041 node might depend on it. */
1042 copy_node_attr(environ->graph, node, nn);
1043 /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
1044 because it already uses availability. */
1046 DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
1051 * Block-walker, computes Antic_in(block).
1052 * Builds a value tree out of the graph by translating values
1055 * @param block the block
1056 * @param ctx the walker environment
1058 static void compute_antic(ir_node *block, void *ctx)
1060 pre_env *env = (pre_env*)ctx;
1061 block_info *succ_info;
1067 ir_valueset_iterator_t iter;
1070 /* filter blocks from topological walker */
1071 if (! is_Block(block))
1074 /* the end block has no successor */
1075 if (block == env->end_block)
1078 info = get_block_info(block);
1080 size = ir_valueset_size(info->antic_in);
1081 n_succ = get_Block_n_cfg_outs(block);
1084 if (env->first_iter) {
1086 /* keep antic_in of infinite loops empty */
1087 if (! is_in_infinite_loop(block)) {
1088 foreach_valueset(info->exp_gen, value, expr, iter) {
1089 ir_valueset_insert(info->antic_in, value, expr);
1093 foreach_valueset(info->exp_gen, value, expr, iter) {
1094 ir_valueset_insert(info->antic_in, value, expr);
1099 // do exp_gen first, because we nee to know which values to kill of with tmpgen.
1101 /* successor might have phi nodes */
1102 if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
1103 succ = get_Block_cfg_out(block, 0);
1104 int pos = get_Block_cfgpred_pos(succ, block);
1105 succ_info = get_block_info(succ);
1107 /* initialize translated set */
1108 if (env->first_iter) {
1109 info->trans = XMALLOC(ir_nodehashmap_t);
1110 ir_nodehashmap_init(info->trans);
1113 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1114 ir_node *trans = phi_translate(expr, succ, pos, block);
1115 /* create new value if necessary */
1116 ir_node *trans_value = identify_or_remember(trans);
1119 DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1121 /* on value change (phi present) we need the translated node
1122 to represent the new value for possible further translation. */
1123 if (value != trans_value)
1128 if (is_clean_in_block(expr, block, info->antic_in)) {
1129 ir_valueset_replace(info->antic_in, trans_value, represent);
1131 set_translated(info->trans, expr, represent);
1133 if (is_hoistable_above(expr, block, 1))
1134 ir_valueset_insert(info->antic_in, trans_value, represent);
1135 set_translated(info->trans, expr, represent);
1139 } else if (n_succ > 1) {
1141 ir_node *common = NULL;
1142 ir_node *succ0 = get_Block_cfg_out(block, 0);
1143 block_info *succ0_info = get_block_info(succ0);
1145 /* disjoint of antic_ins */
1146 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1147 /* iterate over remaining successors */
1148 for (i = 1; i < n_succ; ++i) {
1149 ir_node *succ = get_Block_cfg_out(block, i);
1150 block_info *succ_info = get_block_info(succ);
1152 /* value in antic_in? */
1153 common = ir_valueset_lookup(succ_info->antic_in, value);
1158 //if (common && is_hoistable_above(expr, block, 0) && is_clean_in_block(expr, block, info->antic_in))
1159 if (common && is_clean_in_block(expr, block, info->antic_in))
1160 //ir_valueset_insert(info->antic_in, value, expr);
1161 ir_valueset_replace(info->antic_in, value, expr);
1163 if (common && is_hoistable_above(expr, block, 0)) {
1164 ir_valueset_insert(info->antic_in, value, expr);
1165 DB((dbg, LEVEL_3, "common and clean %+F(%+F) in %+F\n", expr, value, block));
1172 DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1174 if (size != ir_valueset_size(info->antic_in))
1178 /* --------------------------------------------------------
1179 * Main algorithm Avail_out
1180 * --------------------------------------------------------
1184 * Computes Avail_out(block):
1186 * Avail_in(block) = Avail_out(dom(block))
1187 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1190 * This function must be called in the top-down topological order:
1191 * Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1193 * @param block the block
1194 * @param ctx walker context
1196 static void compute_avail_top_down(ir_node *block, void *ctx)
1198 pre_env *env = (pre_env*)ctx;
1201 if (block == env->end_block)
1204 info = get_block_info(block);
1206 /* Add all nodes from the immediate dominator.
1207 This ensures that avail_out contains the leader. */
1208 if (block != env->start_block) {
1209 ir_node *dom_block = get_Block_idom(block);
1210 block_info *dom_info = get_block_info(dom_block);
1213 ir_valueset_iterator_t iter;
1215 foreach_valueset(dom_info->avail_out, value, expr, iter)
1216 /* replace: use the leader from dominator, not local exp_gen */
1217 ir_valueset_replace(info->avail_out, value, expr);
1220 DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1223 /* --------------------------------------------------------
1224 * Main algorithm redundancy detection
1225 * --------------------------------------------------------
1230 * Flags node irn redundant depending on redundant parameter.
1232 static void flag_redundant(ir_node *irn, unsigned redundant)
1235 set_irn_link(irn, irn);
1237 set_irn_link(irn, NULL);
1244 * Returns redundant flag of node irn.
1246 static unsigned is_redundant(ir_node *irn)
1248 return (get_irn_link(irn) != NULL);
1253 * Returns a valid mode if the value of expr is a partially redundant value.
1255 * @param block the block
1256 * @param expr the expression
1258 * @return mode of the expression if it is partially redundant else NULL
1260 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1262 ir_node *first_avail = NULL;
1264 int arity = get_irn_arity(block);
1265 int fully_redundant = 1;
1266 int partially_redundant = 0;
1267 ir_mode *mode = NULL;
1269 DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1271 /* for each predecessor blocks */
1272 for (pos = 0; pos < arity; ++pos) {
1273 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1274 block_info *pred_info;
1275 ir_node *trans_expr;
1276 ir_node *trans_value;
1277 ir_node *avail_expr;
1279 /* ignore bad blocks. */
1280 if (is_Bad(pred_block))
1283 pred_info = get_block_info(pred_block);
1284 trans_expr = get_translated(pred_block, expr);
1285 trans_value = identify(trans_expr);
1287 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1288 /* value might be available through a not yet existing constant */
1289 if (avail_expr == NULL && is_Const(trans_expr)) {
1290 /* limit range of new constants */
1291 ir_mode *cmode = get_irn_mode(trans_expr);
1292 ir_tarval *upper = new_tarval_from_long(127, cmode);
1293 ir_tarval *lower = new_tarval_from_long(-127, cmode);
1294 ir_tarval *c = get_Const_tarval(trans_expr);
1296 if (tarval_cmp(lower, c) != ir_relation_greater_equal &&
1297 tarval_cmp(c, upper) != ir_relation_greater_equal) {
1300 avail_expr = trans_expr;
1304 DB((dbg, LEVEL_3, "avail_expr %+F trans_expr %+F\n", avail_expr, trans_expr));
1306 if (avail_expr == NULL) {
1307 pred_info->avail = trans_expr;
1308 pred_info->found = 0;
1309 fully_redundant = 0;
1311 /* expr is available */
1312 pred_info->avail = avail_expr;
1313 pred_info->found = 1;
1314 mode = get_irn_mode(avail_expr);
1315 partially_redundant = 1;
1317 if (first_avail == NULL)
1318 first_avail = avail_expr;
1319 else if (first_avail != avail_expr)
1320 /* Multiple different expressions are available */
1321 fully_redundant = 0;
1323 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1328 /* value is redundant from last iteration,
1329 but has not been removed from antic_in (is not optimized) */
1330 if (! environ->first_iter && is_redundant(expr))
1334 /* If it is not the same value already existing along every predecessor
1335 and it is defined by some predecessor then it is partially redundant. */
1336 if (! partially_redundant || fully_redundant)
1342 * Updates the new_set of a block by adding the new_set of
1343 * the immediate dominating block.
1347 static void update_new_set(ir_node *block, ir_node *idom)
1351 ir_valueset_iterator_t iter;
1352 block_info *curr_info = get_block_info(block);
1353 block_info *idom_info = get_block_info(idom);
1356 //DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1357 foreach_valueset(idom_info->new_set, value, expr, iter) {
1358 /* inherit new_set from immediate dominator */
1359 ir_valueset_insert(curr_info->new_set, value, expr);
1360 /* replace in avail_out */
1361 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1363 #ifdef DEBUG_libfirm
1365 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1367 } /* update_new_set */
1370 * Checks if hoisting irn is greedy.
1371 * Greedy hoisting means that there are non partially redundant nodes
1372 * hoisted. This happens if a partially redundant node has
1373 * non redundant predecessors.
1375 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1377 int block_arity = get_irn_arity(block);
1378 int arity = get_irn_arity(irn);
1381 /* As long as the predecessor values is available in all predecessor blocks,
1382 we can hoist this value. */
1383 for (pos = 0; pos < block_arity; ++pos) {
1384 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1385 block_info *pred_info = get_block_info(pred_block);
1387 for (i = 0; i < arity; ++i) {
1388 ir_node *pred = get_irn_n(irn, i);
1389 ir_node *value = identify(pred);
1390 //ir_node *anti_leader = get_anti_leader(pred, block);
1392 if (is_irn_constlike(value))
1395 /* predecessor is on current path we have to ensure redundancy */
1396 //if (block_dominates(block, get_nodes_block(pred)) && ! is_redundant(anti_leader))
1397 if (ir_valueset_lookup(pred_info->avail_out, value) == NULL)
1405 * Perform insertion of partially redundant values.
1406 * For every block node, do the following:
1407 * 1. Propagate the NEW_SETS of the dominator into the current block.
1408 * If the block has multiple predecessors,
1409 * 2a. Iterate over the ANTIC expressions for the block to see if
1410 * any of them are partially redundant.
1411 * 2b. If so, insert them into the necessary predecessors to make
1412 * the expression fully redundant.
1413 * 2c. Insert a new Phi merging the values of the predecessors.
1414 * 2d. Insert the new Phi, and the new expressions, into the
1417 * @param block the block
1418 * @param ctx the walker environment
1420 static void insert_nodes_walker(ir_node *block, void *ctx)
1422 pre_env *env = (pre_env*)ctx;
1423 int arity = get_irn_arity(block);
1429 ir_valueset_iterator_t iter;
1435 if (! is_Block(block))
1438 /* ensure that even the start block has a new_set */
1439 info = get_block_info(block);
1441 ir_valueset_del(info->new_set);
1442 info->new_set = ir_valueset_new(16);
1444 if (block == env->start_block)
1447 DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1449 idom = get_Block_idom(block);
1450 update_new_set(block, idom);
1452 /* process only path joining blocks */
1458 stack = plist_new();
1459 foreach_valueset(info->antic_in, value, expr, iter) {
1460 /* inverse topologic */
1461 plist_insert_front(stack, expr);
1465 /* This is the main reason antic_in is preverred over antic_out;
1466 we may iterate over every anticipated value first and not
1467 over the predecessor blocks. */
1468 foreach_valueset(info->antic_in, value, expr, iter) {
1474 if (ir_valueset_lookup(info->antic_done, value))
1477 /* filter phi nodes from antic_in */
1480 flag_redundant(expr, 1);
1485 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1487 /* A value computed in the dominator is totally redundant.
1488 Hence we have nothing to insert. */
1489 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1490 DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1491 DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1494 flag_redundant(expr, 1);
1500 if (is_hoisting_greedy(expr, block)) {
1501 DB((dbg, LEVEL_2, "Greedy\n"));
1503 flag_redundant(expr, 0);
1509 mode = is_partially_redundant(block, expr, value);
1512 flag_redundant(expr, 0);
1517 flag_redundant(expr, 1);
1522 if (is_hoisting_greedy(expr, block)) {
1523 DB((dbg, LEVEL_2, "Better greed: greedy\n"));
1528 #if LOADS || DIVMODS
1529 /* save old mode_M phis to remove keepalive edges later */
1530 if (is_memop(expr)) {
1531 ir_node *mem = get_memop_mem(expr);
1532 if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1533 ir_nodeset_insert(env->keeps, mem);
1538 #ifdef DEBUG_libfirm
1539 if (! is_Proj(expr)) {
1540 if (env->first_iter)
1541 inc_stats(gvnpre_stats->first_iter_found);
1542 inc_stats(gvnpre_stats->partially);
1545 inc_stats(gvnpre_stats->loads);
1546 else if (is_Div(expr) || is_Mod(expr))
1547 inc_stats(gvnpre_stats->divmods);
1550 phi_in = XMALLOCN(ir_node *, arity);
1552 /* for each predecessor block */
1553 for (pos = 0; pos < arity; ++pos) {
1554 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1555 block_info *pred_info;
1557 /* ignore bad blocks. */
1558 if (is_Bad(pred_block)) {
1559 ir_graph *irg = get_irn_irg(pred_block);
1560 phi_in[pos] = new_r_Bad(irg, mode);
1563 pred_info = get_block_info(pred_block);
1565 if (! pred_info->found) {
1566 ir_node *trans = get_translated(pred_block, expr);
1569 if (! block_dominates(get_nodes_block(trans), pred_block)) {
1570 int trans_arity = get_irn_arity(trans);
1572 ir_node **in = XMALLOCN(ir_node *, trans_arity);
1574 /* non phi descendants can also be redundant, but have
1575 not yet been (phi) translated. */
1576 for (i = 0; i < trans_arity; ++i) {
1577 ir_node *pred = get_irn_n(trans, i);
1578 ir_node *avail = ir_valueset_lookup(pred_info->avail_out, identify(pred));
1581 /* phi translate uses anti leader, we need the leader */
1585 get_irn_dbg_info(trans),
1589 get_irn_mode(trans),
1594 identify_or_remember(nn);
1595 set_translated(pred_info->trans, expr, nn);
1596 DB((dbg, LEVEL_3, "Translation during insert: trans %+F\n", nn));
1601 DB((dbg, LEVEL_3, "Use new %+F in %+F because expr %+F(%+F) not available\n", trans, pred_block, expr, value));
1603 /* value is now available in target block through trans
1604 insert (not replace) because it has not been available */
1605 ir_valueset_insert(pred_info->avail_out, value, trans);
1608 DEBUG_ONLY(dump_value_set(pred_info->antic_in, "Antic_in", block);)
1610 phi_in[pos] = trans;
1612 /* value available */
1613 phi_in[pos] = pred_info->avail;
1615 DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1618 /* We do not connect tuples as they will be connected automatically
1619 by the corresponding projections. */
1620 if (get_irn_mode(expr) != mode_T) {
1622 phi = new_r_Phi(block, arity, phi_in, mode);
1623 DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1625 /* this value is now available through the new phi */
1626 ir_valueset_replace(info->avail_out, value, phi);
1627 ir_valueset_insert(info->new_set, value, phi);
1632 /* remove from antic_in, because expr is not anticipated
1633 anymore in this block */
1634 ir_valueset_remove_iterator(info->antic_in, &iter);
1636 ir_valueset_insert(info->antic_done, value, expr);
1643 plist_element_t *it;
1644 /* iterate in inverse topological order */
1645 foreach_plist(stack, it) {
1646 ir_node *irn = (ir_node *)plist_element_get_value(it);
1647 ir_node *block = get_nodes_block(irn);
1651 /* does irn only have redundant successors? */
1653 if (! get_irn_outs_computed(irn))
1656 for (j = get_irn_n_outs(irn) - 1; j >= 0; --j) {
1657 ir_node *succ = get_irn_out(irn, j);
1659 /* if succ and irn are in the same block */
1660 if (get_nodes_block(succ) == block && is_redundant(succ)) {
1669 flag_redundant(irn, 1);
1678 static void update_new_set_walker(ir_node *block, void *ctx)
1680 pre_env *env = (pre_env*)ctx;
1682 if (! is_Block(block))
1684 if (block == env->start_block)
1687 update_new_set(block, get_Block_idom(block));
1691 * Dom tree block walker to insert nodes into the highest
1692 * possible block where their .
1695 static void hoist_high(ir_node *block, void *ctx)
1697 pre_env *env = (pre_env*)ctx;
1698 block_info *curr_info;
1699 ir_valueset_iterator_t iter;
1702 int arity = get_irn_arity(block);
1704 if (! is_Block(block))
1707 curr_info = get_block_info(block);
1709 if (curr_info->new_set)
1710 ir_valueset_del(curr_info->new_set);
1711 curr_info->new_set = ir_valueset_new(16);
1713 if (block == env->start_block)
1719 DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1721 /* foreach entry optimized by insert node phase */
1722 foreach_valueset(curr_info->antic_done, value, expr, iter) {
1725 if (is_memop(expr) || is_Proj(expr))
1728 DB((dbg, LEVEL_2, "leader %+F value %+F\n", expr, value));
1730 /* visit hoisted expressions */
1731 for (pos = 0; pos < arity; ++pos) {
1732 /* standard target is predecessor block */
1733 ir_node *target = get_Block_cfgpred_block(block, pos);
1734 block_info *pred_info = get_block_info(target);
1736 ir_node *new_target;
1737 ir_node *trans_expr;
1738 ir_node *trans_value;
1742 unsigned nest_depth;
1743 block_info *dom_info;
1745 /* get phi translated value */
1746 trans_expr = get_translated(target, expr);
1747 trans_value = identify(trans_expr);
1748 avail = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1750 /* get the used expr on this path */
1752 /* TODO when does this happen? */
1756 avail_arity = get_irn_arity(avail);
1758 value = identify(avail);
1760 /* anticipation border */
1762 nest_depth = get_loop_depth(get_irn_loop(target));
1765 while (dom && dom != environ->start_block) {
1767 dom = get_Block_idom(dom);
1768 dom_info = get_block_info(dom);
1769 DB((dbg, LEVEL_2, "testing dom %+F\n", dom));
1771 /* TODO antic_in means hoistable above block,
1772 but we need 'hoistable into block'. */
1774 /* check if available node ist still anticipated and clean */
1775 if (! ir_valueset_lookup(dom_info->antic_in, value)) {
1776 DB((dbg, LEVEL_2, "%+F not antic in %+F\n", value, dom));
1780 nest_depth = get_loop_depth(get_irn_loop(dom));
1782 /* do not hoist into loops */
1783 if (! get_loop_depth(get_irn_loop(dom)) <= nest_depth) {
1784 DB((dbg, LEVEL_2, "%+F deeper nested\n", dom));
1785 /* not a suitable location */
1790 /************* check if value dies */
1792 /* check for uses on current path*/
1793 for (i = 0; i < avail_arity; i++) {
1794 ir_node *pred = get_irn_n(avail, i);
1795 ir_node *pred_value = identify(pred);
1801 DB((dbg, LEVEL_2, "testing pred %+F\n", pred));
1803 if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
1804 DB((dbg, LEVEL_2, "%+F deeper nested\n", dom));
1809 /* TODO this might be a problem as we mainly operate on new nodes */
1810 if (! get_irn_outs_computed(pred)) {
1811 DB((dbg, LEVEL_2, "%+F outs not computed\n", pred));
1816 /* check every successor */
1817 for (j = get_irn_n_outs(pred) - 1; j >= 0; --j) {
1818 ir_node *succ = get_irn_out(pred, j);
1819 DB((dbg, LEVEL_2, "testing succ %+F\n", succ));
1821 /* check only successors on current path to end */
1822 if (block_dominates(dom, get_nodes_block(succ))) {
1823 ir_node *succ_value = identify(succ);
1825 /* Do we have another user than avail?
1826 Then predecessor is not dead after removal of avail. */
1827 if (succ_value != value) {
1828 DB((dbg, LEVEL_4, "fail: still used in %+F\n", succ));
1840 DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
1841 set_nodes_block(avail, new_target);
1842 ir_valueset_insert(dom_info->new_set, value, avail);
1849 /* No new target or does the available node already dominate the new_target? */
1851 DB((dbg, LEVEL_2, "leader block %+F\n", get_nodes_block(avail)));
1852 if (! block_strictly_dominates(new_target, get_nodes_block(avail))) {
1853 DB((dbg, LEVEL_4, "fail: antic border %+F\n", block));
1861 DB((dbg, LEVEL_2, "antic border %+F\n", new_target));
1866 /* only one usage on our path */
1868 /* push the available node up into */
1869 set_nodes_block(avail, new_target);
1871 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1877 /* --------------------------------------------------------
1878 * Elimination of fully redundant nodes
1879 * --------------------------------------------------------
1883 * Walker which finds redundant nodes using avail_out sets
1884 * and exchanges them for existing ones.
1885 * We cannot change the graph here as this would affect
1886 * the hash values of the nodes.
1888 * @param irn the node
1889 * @param ctx the walker environment
1891 static void eliminate(ir_node *irn, void *ctx)
1893 pre_env *env = (pre_env*)ctx;
1895 if (! is_Block(irn)) {
1896 ir_node *block = get_nodes_block(irn);
1897 block_info *info = get_block_info(block);
1898 ir_node *value = identify(irn);
1900 if (value != NULL) {
1901 ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1903 if (expr != NULL && expr != irn) {
1904 elim_pair *p = OALLOC(env->obst, elim_pair);
1907 if (get_irn_idx(expr) <= env->last_idx)
1913 p->next = env->pairs;
1914 if (get_irn_idx(expr) > env->last_idx)
1915 p->reason = FS_OPT_GVN_PARTLY;
1917 p->reason = FS_OPT_GVN_FULLY;
1919 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1926 * Do all the recorded changes and optimize
1927 * newly created Phi's.
1929 * @param pairs list of elimination pairs
1931 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1934 ir_nodeset_iterator_t iter;
1936 ir_node *end = environ->end_node;
1938 for (p = pairs; p != NULL; p = p->next) {
1939 /* might be already changed */
1940 p->new_node = skip_Id(p->new_node);
1942 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1944 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1945 * which we can optimize here */
1946 if (is_Phi(p->new_node)) {
1948 ir_node *res = NULL;
1950 for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1951 ir_node *pred = get_irn_n(p->new_node, i);
1953 if (pred != p->old_node) {
1962 exchange(p->new_node, res);
1966 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1968 exchange(p->old_node, p->new_node);
1971 /* remove keep alive edges of unused mode_M phis */
1972 foreach_ir_nodeset(keeps, m_phi, iter) {
1973 remove_End_keepalive(end, m_phi);
1975 } /* eliminate_nodes */
1977 /* --------------------------------------------------------
1979 * --------------------------------------------------------
1983 * Gvn_Pre algorithm.
1985 * @param irg the graph
1987 static void gvn_pre(ir_graph *irg, pre_env *env)
1989 unsigned antic_iter;
1990 unsigned insert_iter;
1992 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1994 /* allocate block info */
1995 irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1997 ir_nodehashmap_init(&value_map);
1999 /* generate exp_gen */
2000 irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
2001 dump_all_expgen_sets(env->list);
2003 /* compute the avail_out sets for all blocks */
2004 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
2006 /* compute the anticipated value sets for all blocks */
2008 env->first_iter = 1;
2010 /* antic_in passes */
2013 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
2015 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
2016 env->first_iter = 0;
2017 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
2018 } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
2020 DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
2022 ir_nodeset_init(env->keeps);
2024 env->first_iter = 1;
2025 env->last_idx = get_irg_last_idx(irg);
2026 /* compute redundant expressions */
2029 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
2031 /* TODO topologically top down would be better; fewer iterations. */
2032 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
2033 env->first_iter = 0;
2034 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
2035 } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
2036 DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
2039 dom_tree_walk_irg(irg, hoist_high, NULL, env);
2040 dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
2045 /* last step: eliminate nodes */
2046 irg_walk_graph(irg, NULL, eliminate, env);
2047 eliminate_nodes(env->pairs, env->keeps);
2049 ir_nodeset_destroy(env->keeps);
2053 * Gvn_Pre pass for graph irg.
2055 * @param irg the graph
2057 void do_gvn_pre(ir_graph *irg)
2059 struct obstack obst;
2062 optimization_state_t state;
2063 block_info *block_info;
2065 /* bads and unreachables cause too much trouble with dominance
2067 loop info for endless loop detection
2068 no critical edges is GVN-PRE precondition
2070 assure_irg_properties(irg,
2071 IR_GRAPH_PROPERTY_NO_BADS
2072 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
2073 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
2074 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
2075 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
2076 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
2078 /* register a debug mask */
2079 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
2081 save_optimization_state(&state);
2084 /* edges will crash if enabled due to our allocate on other obstack trick */
2085 edges_deactivate(irg);
2086 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2088 DEBUG_ONLY(init_stats();)
2090 /* setup environment */
2091 obstack_init(&obst);
2095 env.start_block = get_irg_start_block(irg);
2096 env.end_block = get_irg_end_block(irg);
2097 env.end_node = get_irg_end(irg);
2101 /* Detect and set links of infinite loops to non-zero. */
2104 /* Switch on GCSE. We need it to correctly compute
2105 the value of a node, which is independent from
2107 set_opt_global_cse(1);
2108 /* new_identities */
2109 if (irg->value_table != NULL)
2110 del_pset(irg->value_table);
2111 /* initially assumed nodes in pset are 512 */
2112 irg->value_table = new_pset(compare_gvn_identities, 512);
2114 /* do GVN-PRE pass */
2116 DEBUG_ONLY(print_stats();)
2118 /* clean up: delete all sets */
2119 for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2120 free_block_info(block_info);
2123 DEBUG_ONLY(free_stats();)
2124 ir_nodehashmap_destroy(&value_map);
2125 obstack_free(&obst, NULL);
2126 ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2128 /* Pin the graph again.
2129 This is needed due to the use of set_opt_global_cse(1) */
2130 set_irg_pinned(irg, op_pin_state_pinned);
2131 restore_optimization_state(&state);
2132 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2134 /* TODO there are optimizations that try to use the existing value_table */
2135 new_identities(irg);
2138 /* Creates an ir_graph pass for do_gvn_pre. */
2139 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2141 return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);