2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
46 #include "irgraph_t.h"
51 #define MAX_ANTIC_ITER 10
52 #define MAX_INSERT_ITER 3
56 #define BETTER_GREED 0
59 #define NO_INF_LOOPS 0
60 #define NO_INF_LOOPS2 0
62 /** Additional info we need for every block. */
63 typedef struct block_info {
64 ir_valueset_t *exp_gen; /* contains this blocks clean expressions */
65 ir_valueset_t *avail_out; /* available values at block end */
66 ir_valueset_t *antic_in; /* clean anticipated values at block entry */
67 ir_valueset_t *antic_done; /* keeps elements of antic_in after insert nodes phase*/
68 ir_valueset_t *new_set; /* new by hoisting made available values */
69 ir_nodehashmap_t *trans; /* contains translated nodes translated into block */
70 ir_node *avail; /* saves available node for insert node phase */
71 int found; /* saves kind of availability for insert_node phase */
72 ir_node *block; /* block of the block_info */
73 struct block_info *next; /* links all instances for easy access */
77 * A pair of nodes to be exchanged.
78 * We have to defer the exchange because our hash-sets cannot
79 * find an already replaced node.
81 typedef struct elim_pair {
82 ir_node *old_node; /* node that will be replaced */
83 ir_node *new_node; /* replacement for old_node */
84 struct elim_pair *next; /* links all instances for easy access */
85 int reason; /* reason for the replacement */
88 /** environment for the GVN-PRE algorithm */
89 typedef struct pre_env {
90 ir_graph *graph; /* current graph */
91 struct obstack *obst; /* obstack to allocate on */
92 ir_node *start_block; /* start block of the current graph */
93 ir_node *end_block; /* end block of the current graph */
94 ir_node *end_node; /* end node of the current graph */
95 block_info *list; /* block_info list head */
96 elim_pair *pairs; /* elim_pair list head */
97 ir_nodeset_t *keeps; /* a list of to be removed phis to kill their keep alive edges */
98 unsigned last_idx; /* last node index of input graph */
99 char changes; /* flag for fixed point iterations - non-zero if changes occurred */
100 char first_iter; /* non-zero for first fixed point iteration */
103 static pre_env *environ;
104 /* custom GVN value map */
105 static ir_nodehashmap_t value_map;
107 /* debug module handle */
108 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
112 /* --------------------------------------------------------
114 * --------------------------------------------------------
117 typedef struct gvnpre_statistics {
124 int first_iter_found;
125 int antic_iterations;
126 int insert_iterations;
130 gvnpre_statistics *gvnpre_stats = NULL;
132 static void init_stats()
134 gvnpre_stats = XMALLOCZ(gvnpre_statistics);
137 static void free_stats()
143 static void print_stats()
145 gvnpre_statistics *stats = gvnpre_stats;
146 DB((dbg, LEVEL_1, "replaced : %d\n", stats->replaced));
147 DB((dbg, LEVEL_1, "antic_in iterations : %d\n", stats->antic_iterations));
148 DB((dbg, LEVEL_1, "insert iterations : %d\n", stats->insert_iterations));
149 DB((dbg, LEVEL_1, "infinite loops : %d\n", stats->infinite_loops));
150 DB((dbg, LEVEL_1, "fully redundant : %d\n", stats->fully));
151 DB((dbg, LEVEL_1, "partially redundant : %d\n", stats->partially));
152 DB((dbg, LEVEL_1, " loads : %d\n", stats->loads));
153 DB((dbg, LEVEL_1, " Divs/Mods : %d\n", stats->divmods));
154 DB((dbg, LEVEL_1, " hoist high : %d\n", stats->hoist_high));
155 DB((dbg, LEVEL_1, " first iteration : %d\n", stats->first_iter_found));
158 #define set_stats(var, value) (var)=(value)
159 #define inc_stats(var) ((var)+=1)
161 /* --------------------------------------------------------
163 * --------------------------------------------------------
169 * @param set the set to dump
170 * @param txt a text to describe the set
171 * @param block the owner block of the set
173 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
175 ir_valueset_iterator_t iter;
176 ir_node *value, *expr;
179 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
181 foreach_valueset(set, value, expr, iter) {
183 DB((dbg, LEVEL_2, "\n"));
185 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
187 DB((dbg, LEVEL_2, " %+F,", expr));
190 DB((dbg, LEVEL_2, "\n}\n"));
191 } /* dump_value_set */
194 * Dump all exp_gen value sets.
196 * @param list the list of block infos to retrieve the sets from
198 static void dump_all_expgen_sets(block_info *list)
200 block_info *block_info;
202 for (block_info = list; block_info != NULL; block_info = block_info->next) {
203 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
209 #define dump_all_expgen_sets(list)
210 #define dump_value_set(set, txt, block)
212 #endif /* DEBUG_libfirm */
214 /* --------------------------------------------------------
216 * --------------------------------------------------------
220 * Compares node collisions in valuetable.
221 * Modified identities_cmp().
223 static int compare_gvn_identities(const void *elt, const void *key)
225 ir_node *a = (ir_node *)elt;
226 ir_node *b = (ir_node *)key;
229 if (a == b) return 0;
231 /* phi nodes kill predecessor values and are always different */
232 if (is_Phi(a) || is_Phi(b))
235 /* memops are not the same, even if we want optimize them
236 we have to take the order in account */
237 if (is_memop(a) || is_memop(b)) {
238 /* Loads with the same predecessors are the same value;
239 this should only happen after phi translation. */
240 if (! is_Load(a) || ! is_Load(b))
244 if ((get_irn_op(a) != get_irn_op(b)) ||
245 (get_irn_mode(a) != get_irn_mode(b))) return 1;
247 /* compare if a's in and b's in are of equal length */
248 irn_arity_a = get_irn_arity(a);
249 if (irn_arity_a != get_irn_arity(b))
252 /* blocks are never the same */
253 if (is_Block(a) || is_Block(b))
256 /* should only be used with gcse enabled */
257 assert(get_opt_global_cse());
259 /* compare a->in[0..ins] with b->in[0..ins] */
260 for (i = 0; i < irn_arity_a; ++i) {
261 ir_node *pred_a = get_irn_n(a, i);
262 ir_node *pred_b = get_irn_n(b, i);
263 if (pred_a != pred_b) {
264 if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
270 * here, we already now that the nodes are identical except their
273 if (a->op->ops.node_cmp_attr)
274 return a->op->ops.node_cmp_attr(a, b);
280 * Identify does a lookup in the GVN valuetable.
281 * To be used when no new GVN values are to be created.
283 * @param e a node representing an expression
284 * @return a node representing the value
286 static ir_node *identify(ir_node *irn)
288 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
291 /* irn represents a new value, so return the leader */
292 return identify_remember(irn);
296 * remember() adds node irn to the GVN valuetable.
297 * Identify_remember only identifies values of nodes with the
298 * same predecessor nodes (not values). By creating a node from the predecessor
299 * values/leaders, a true valuetree is built. Phis kill their predecessor value,
300 * so no circular dependencies need to be resolved.
303 * Maybe this could be implemented with a custom node hash that takes
304 * phi nodes and true values (instead of predecessors) into account,
305 * resulting in value numbers.
306 * TODO This unnecessarily also handles nodes like calls, which are never equal.
308 * @param irn a node representing an expression
309 * @return the value of the expression
311 static ir_node *remember(ir_node *irn)
313 int arity = get_irn_arity(irn);
316 ir_node **in = XMALLOCN(ir_node *, arity);
319 for (i = 0; i < arity; ++i) {
320 ir_node *pred = get_irn_n(irn, i);
321 /* value and leader at the same time */
322 ir_node *pred_value = identify(pred);
324 /* phi will be translated anyway, so kill the predecessor values
325 this also prevents circular dependencies */
327 /* every phi represents its own value */
332 /* predecessor is not its value representation/the leader */
333 if (pred != pred_value)
338 /* do not create new value for loads or else it will not
339 be found in avail_out during insert phase */
340 if (changed && ! is_Load(irn)) {
341 /* create representative for */
342 ir_node *nn = new_ir_node(
343 get_irn_dbg_info(irn),
345 get_nodes_block(irn),
350 copy_node_attr(environ->graph, irn, nn);
352 /* now the value can be determined because the
353 predecessors are the leaders */
354 value = identify_remember(nn);
356 value = identify_remember(irn);
360 DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
361 ir_nodehashmap_insert(&value_map, irn, value);
366 /** When the value map has been built we may lookup expressions
367 * and remember them if new.
369 static ir_node *identify_or_remember(ir_node *irn)
371 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
375 return remember(irn);
378 /* --------------------------------------------------------
380 * --------------------------------------------------------
384 * Allocate block info for block block.
386 * @param block the block
387 * @param env the environment
389 static void alloc_block_info(ir_node *block, pre_env *env)
391 block_info *info = OALLOC(env->obst, block_info);
393 set_irn_link(block, info);
394 info->exp_gen = ir_valueset_new(16);
395 info->avail_out = ir_valueset_new(16);
396 info->antic_in = ir_valueset_new(16);
397 info->antic_done = ir_valueset_new(16);
398 info->trans = XMALLOC(ir_nodehashmap_t);
399 ir_nodehashmap_init(info->trans);
401 info->new_set = NULL;
406 info->next = env->list;
408 } /* alloc_block_info */
410 static void free_block_info(block_info *block_info)
412 ir_valueset_del(block_info->exp_gen);
413 ir_valueset_del(block_info->avail_out);
414 ir_valueset_del(block_info->antic_in);
415 if (block_info->trans) {
416 ir_nodehashmap_destroy(block_info->trans);
417 free(block_info->trans);
419 if (block_info->new_set)
420 ir_valueset_del(block_info->new_set);
424 * Bottom up walker that ensures that every block gets a block info.
426 * @param irn the node
427 * @param ctx the environment
429 static void block_info_walker(ir_node *irn, void *ctx)
432 pre_env *env = (pre_env*)ctx;
433 alloc_block_info(irn, env);
438 * Returns the block info of a block.
440 static block_info *get_block_info(ir_node *block)
442 return (block_info*)get_irn_link(block);
445 /* --------------------------------------------------------
446 * Infinite loop analysis
447 * --------------------------------------------------------
451 * Walker to set block marks and loop links to 0.
453 static void clear_block_mark_loop_link(ir_node *block, void *env)
457 if (is_Block(block)) {
458 set_Block_mark(block, 0);
459 set_loop_link(get_irn_loop(block), NULL);
464 * Returns non-zero if block is part of real loop loop.
467 static unsigned in_loop(ir_node *block, ir_loop *loop)
469 ir_loop *l = get_irn_loop(block);
470 ir_loop *outer = get_irg_loop(environ->graph);
473 /* loop tree root is not a loop */
474 if (l == NULL || l == outer)
476 l = get_loop_outer_loop(l);
482 * Returns the outermost real loop of loop.
484 static ir_loop *get_loop_outermost(ir_loop *loop)
486 ir_loop *outer = get_irg_loop(environ->graph);
488 ir_loop *last = NULL;
492 l = get_loop_outer_loop(l);
498 * Topologic bottom-up walker sets links of infinite loops to non-zero.
499 * Block marks are used to flag blocks reachable (from end) on the one hand,
500 * on the other hand they are set if the block is not part of an infinite loop.
502 static void infinite_loop_walker(ir_node *block, void *env)
508 if (! is_Block(block))
511 /* start block has no predecessors */
512 if (block == environ->start_block)
515 arity = get_irn_arity(block);
517 /* block not part of a real loop: no infinite loop */
518 if (get_irn_loop(block) == get_irg_loop(environ->graph))
519 set_Block_mark(block, 1);
521 if (get_Block_mark(block)) {
522 /* reachable block: mark all cf predecessors */
523 for (i = 0; i < arity; ++i) {
524 ir_node *pred = get_Block_cfgpred_block(block, i);
527 set_Block_mark(pred, 1);
530 /* We are in a real loop and see an unreachable block. */
531 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
533 /* flag loop as infinite */
534 set_loop_link(outermost_loop, outermost_loop);
535 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
537 /* The cf predecessors are unreachable, but can never be part of
538 an infinite loop, because we just reached them. So we set the
539 blockmark to prevent triggering the infinite loop detection. */
541 /* passing information to the cf predecessors */
542 for (i = 0; i < arity; ++i) {
543 ir_node *pred = get_Block_cfgpred_block(block, i);
548 /* If our cf predecessor is in the same endless loop,
549 it is also unreachable. */
550 if (in_loop(pred, outermost_loop)) {
551 set_Block_mark(pred, 0);
553 /* When we leave the unreachable loop, we artificially
554 declare the cf predecessor reachable. */
555 set_Block_mark(pred, 1);
562 * Sets loop links of outermost infinite loops to non-zero.
564 static void analyse_loops(ir_graph *irg)
566 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
568 /* reset block mark and loop links */
569 irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
571 /* mark end block reachable */
572 set_Block_mark(get_irg_end_block(irg), 1);
573 irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
575 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
578 #if NO_INF_LOOPS || NO_INF_LOOPS2
580 * Returns non-zero if block is part of an infinite loop.
582 static unsigned is_in_infinite_loop(ir_node *block)
586 assert(is_Block(block));
587 loop = get_irn_loop(block);
590 loop = get_loop_outermost(loop);
592 return (get_loop_link(loop) != NULL);
598 /* --------------------------------------------------------
600 * --------------------------------------------------------
604 * Returns non-zero if a node is movable and a possible candidate for PRE.
606 static unsigned is_nice_value(ir_node *n)
608 ir_mode *mode = get_irn_mode(n);
614 if (is_Proj(n) && mode != mode_X && mode != mode_T)
623 return get_Load_volatility(n) == volatility_non_volatile;
626 if (get_irn_pinned(n) == op_pin_state_pinned)
629 if (! mode_is_data(mode)) {
630 if (! is_Div(n) && ! is_Mod(n))
637 * Checks if a node n is clean in block block for exp_gen.
640 * @param block the block
641 * @return non-zero value for clean node
643 static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
650 if (! is_nice_value(n))
653 /* filter loads from antic_in */
654 if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
657 arity = get_irn_arity(n);
658 for (i = 0; i < arity; ++i) {
659 ir_node *pred = get_irn_n(n, i);
665 /* we only handle current block */
666 if (get_nodes_block(pred) != block)
669 if (! is_nice_value(pred))
672 value = identify(pred);
673 if (! ir_valueset_lookup(valueset, value))
681 * Topological walker puts nodes in top-down topological order into exp_gen set.
682 * Assumed to walk blockwise and nodewise topologically top-down.
684 * @param irn the node
685 * @param ctx the environment
687 static void topo_walker(ir_node *irn, void *ctx)
697 /* GVN step: remember the value. */
698 value = remember(irn);
700 /* values that are not in antic_in also dont't need to be in any other set */
701 if (! is_nice_value(irn))
704 if (is_irn_constlike(irn))
707 block = get_nodes_block(irn);
708 info = get_block_info(block);
710 ir_valueset_insert(info->avail_out, value, irn);
712 if (is_clean_in_block(irn, block, info->exp_gen)) {
713 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
715 ir_valueset_insert(info->exp_gen, value, irn);
719 /* --------------------------------------------------------
721 * --------------------------------------------------------
725 * Gets result of nodes phi translation into block.
727 * @param node the node
728 * @param block the target block
730 * @return a phi translation of node node into block block or NULL
732 static ir_node *get_translated(ir_node *block, ir_node *node)
734 if (is_irn_constlike(node))
737 return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
741 * Saves result of phi translation of node into predecessor
742 * at pos of block succ.
744 * @param node the node
745 * @param succ the successor of the translation target block
746 * @param pos the position of the predecessor block
747 * @param trans the translation result
750 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
752 if (is_irn_constlike(node))
754 /* insert or replace */
755 ir_nodehashmap_insert(map, node, trans);
759 /* Helper function to compare the values of pred and avail_pred. */
760 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
762 ir_node *avail_value = identify(avail_pred);
763 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
764 ir_node *trans_pred = get_translated(pred_block, pred);
767 if (trans_pred == NULL)
769 value = identify(trans_pred);
771 DB((dbg, LEVEL_3, "manual compare %+F %+F\n", pred, avail_pred));
773 return (value == avail_value);
777 * Does phi translation for redundant Div/Mod nodes only.
778 * Returns NULL for non-redundant node, which needs to be phi translated.
780 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
782 ir_node *mem = get_memop_mem(divmod);
783 ir_node *trans = get_translated_pred(block, pos, mem);
788 /* no partial redundancy if this is a mode_M phi */
789 if (is_Proj(trans)) {
790 /* The last memory operation in predecessor block */
791 ir_node *avail_op = get_Proj_pred(trans);
793 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
794 unsigned left, right;
796 if (is_Div(avail_op)) {
797 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
798 get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
800 left = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
801 right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
806 } else if (is_Mod(avail_op)) {
807 if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
809 left = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
810 right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
823 * Translates an expression above a Phi.
825 * @param node the node
826 * @param block the block the node is translated into
827 * @param pos the input number of the destination block
829 * @return a node representing the translated value
831 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *leaderset)
836 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
841 if (get_nodes_block(node) == block)
842 return get_Phi_pred(node, pos);
843 /* this phi does not need translation */
846 arity = get_irn_arity(node);
849 if (is_Div(node) || is_Mod(node)) {
850 ir_node *avail_op = phi_translate_divmod(node, block, pos);
857 in = ALLOCANZ(ir_node *, arity);
859 /* A value has several representatives. The anti leader is chosen to be
860 the main representative. If we access a node as representative of a
861 value we always use the anti leader. The anti leader can be found by
862 antic_in(identify(node)). */
863 for (i = 0; i < arity; ++i) {
864 ir_node *pred = get_irn_n(node, i);
865 ir_node *value = identify(pred);
866 /* get leader for pred to lookup its translated value */
867 ir_node *leader = ir_valueset_lookup(leaderset, value);
873 /* we cannot find this value in antic_in, because the value
874 has (possibly) changed! */
875 pred_trans = get_translated(pred_block, leader);
877 DB((dbg, LEVEL_3, "trans %+F of %+F is %+F\n", leader, pred_block, pred_trans));
878 if (pred_trans == NULL) {
881 new_pred = pred_trans;
883 /* loads: in case of memory projection and load,
884 skip them and use the loads memory */
885 if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
886 ir_node *load = get_Proj_pred(pred_trans);
887 /* translated predecessor will be different */
891 /* Put new load under the adjacent loads mem
892 such that GVN may compare them. */
893 new_pred = get_Load_mem(load);
896 /* predecessor value changed, so translation is needed */
897 if (identify(new_pred) != identify(pred))
902 DB((dbg, LEVEL_4, "in %+F\n", new_pred));
909 DB((dbg, LEVEL_3, "Translate\n"));
912 pred_block = get_nodes_block(in[0]);
914 /* copy node to represent the new value.
915 We do not translate nodes that do not need translation,
916 so we use the newly created nodes as value representatives only.
917 Their block is not important, because we create new ones during
918 the insert node phase. */
920 get_irn_dbg_info(node),
927 /* We need the attribute copy here, because the Hash value of a
928 node might depend on it. */
929 copy_node_attr(environ->graph, node, nn);
930 /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
931 because it already uses availability. */
933 DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
938 * Block-walker, computes Antic_in(block).
939 * Builds a value tree out of the graph by translating values
942 * @param block the block
943 * @param ctx the walker environment
945 static void compute_antic(ir_node *block, void *ctx)
947 pre_env *env = (pre_env*)ctx;
948 block_info *succ_info;
954 ir_valueset_iterator_t iter;
957 /* filter blocks from topological walker */
958 if (! is_Block(block))
961 /* the end block has no successor */
962 if (block == env->end_block)
965 info = get_block_info(block);
967 size = ir_valueset_size(info->antic_in);
968 n_succ = get_Block_n_cfg_outs(block);
971 if (env->first_iter) {
973 /* keep antic_in of infinite loops empty */
974 if (! is_in_infinite_loop(block)) {
975 foreach_valueset(info->exp_gen, value, expr, iter) {
976 ir_valueset_insert(info->antic_in, value, expr);
980 foreach_valueset(info->exp_gen, value, expr, iter) {
981 ir_valueset_insert(info->antic_in, value, expr);
986 /* successor might have phi nodes */
987 if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
988 succ = get_Block_cfg_out(block, 0);
989 int pos = get_Block_cfgpred_pos(succ, block);
990 succ_info = get_block_info(succ);
992 /* initialize translated set */
993 if (env->first_iter) {
994 info->trans = XMALLOC(ir_nodehashmap_t);
995 ir_nodehashmap_init(info->trans);
998 foreach_valueset(succ_info->antic_in, value, expr, iter) {
999 ir_node *trans = get_translated(block, expr);
1000 ir_node *trans_value;
1004 trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
1005 /* create new value if necessary */
1006 trans_value = identify_or_remember(trans);
1008 DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1010 /* on value change (phi present) we need the translated node
1011 to represent the new value for possible further translation. */
1012 if (value != trans_value)
1017 // test only 1 translation, because more failed, because of uncleanliness of load
1018 if (is_clean_in_block(expr, block, info->antic_in) && ! is_Load(expr)) {
1020 /* TODO to be determined why nearly every spec benchmark fails */
1021 if (! is_in_infinite_loop(succ) || ! is_backedge(succ, pos)) {
1022 ir_valueset_replace(info->antic_in, trans_value, represent);
1025 ir_valueset_replace(info->antic_in, trans_value, represent);
1028 set_translated(info->trans, expr, represent);
1031 } else if (n_succ > 1) {
1033 ir_node *common = NULL;
1034 ir_node *succ0 = get_Block_cfg_out(block, 0);
1035 block_info *succ0_info = get_block_info(succ0);
1037 /* disjoint of antic_ins */
1038 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1039 /* iterate over remaining successors */
1040 for (i = 1; i < n_succ; ++i) {
1041 ir_node *succ = get_Block_cfg_out(block, i);
1042 block_info *succ_info = get_block_info(succ);
1044 /* value in antic_in? */
1045 common = ir_valueset_lookup(succ_info->antic_in, value);
1050 if (common && is_clean_in_block(expr, block, info->antic_in))
1051 ir_valueset_replace(info->antic_in, value, expr);
1056 DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1058 if (size != ir_valueset_size(info->antic_in))
1062 /* --------------------------------------------------------
1063 * Main algorithm Avail_out
1064 * --------------------------------------------------------
1068 * Computes Avail_out(block):
1070 * Avail_in(block) = Avail_out(dom(block))
1071 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1074 * This function must be called in the top-down topological order:
1075 * Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1077 * @param block the block
1078 * @param ctx walker context
1080 static void compute_avail_top_down(ir_node *block, void *ctx)
1082 pre_env *env = (pre_env*)ctx;
1085 if (block == env->end_block)
1088 info = get_block_info(block);
1090 /* Add all nodes from the immediate dominator.
1091 This ensures that avail_out contains the leader. */
1092 if (block != env->start_block) {
1093 ir_node *dom_block = get_Block_idom(block);
1094 block_info *dom_info = get_block_info(dom_block);
1097 ir_valueset_iterator_t iter;
1099 foreach_valueset(dom_info->avail_out, value, expr, iter)
1100 /* replace: use the leader from dominator, not local exp_gen */
1101 ir_valueset_replace(info->avail_out, value, expr);
1104 DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1107 /* --------------------------------------------------------
1108 * Main algorithm redundancy detection
1109 * --------------------------------------------------------
1113 * Returns a valid mode if the value of expr is a partially redundant value.
1115 * @param block the block
1116 * @param expr the expression
1118 * @return mode of the expression if it is partially redundant else NULL
1120 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1122 ir_node *first_avail = NULL;
1124 int arity = get_irn_arity(block);
1125 int fully_redundant = 1;
1126 int partially_redundant = 0;
1127 ir_mode *mode = NULL;
1129 DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1131 /* for each predecessor blocks */
1132 for (pos = 0; pos < arity; ++pos) {
1133 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1134 block_info *pred_info;
1135 ir_node *trans_expr;
1136 ir_node *trans_value;
1137 ir_node *avail_expr;
1139 pred_info = get_block_info(pred_block);
1140 trans_expr = get_translated(pred_block, expr);
1141 trans_value = identify(trans_expr);
1143 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1144 /* value might be available through a not yet existing constant */
1145 if (avail_expr == NULL && is_Const(trans_expr)) {
1146 /* limit range of new constants */
1147 ir_mode *cmode = get_irn_mode(trans_expr);
1148 ir_tarval *upper = new_tarval_from_long(127, cmode);
1149 ir_tarval *lower = new_tarval_from_long(-127, cmode);
1150 ir_tarval *c = get_Const_tarval(trans_expr);
1152 if (tarval_cmp(lower, c) != ir_relation_greater_equal &&
1153 tarval_cmp(c, upper) != ir_relation_greater_equal) {
1156 avail_expr = trans_expr;
1160 DB((dbg, LEVEL_3, "avail_expr %+F trans_expr %+F\n", avail_expr, trans_expr));
1162 if (avail_expr == NULL) {
1163 pred_info->avail = trans_expr;
1164 pred_info->found = 0;
1165 fully_redundant = 0;
1167 /* expr is available, use the leader */
1168 pred_info->avail = avail_expr;
1169 pred_info->found = 1;
1170 mode = get_irn_mode(avail_expr);
1171 partially_redundant = 1;
1173 if (first_avail == NULL)
1174 first_avail = avail_expr;
1175 else if (first_avail != avail_expr)
1176 /* Multiple different expressions are available */
1177 fully_redundant = 0;
1179 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1184 /* value is redundant from last iteration,
1185 but has not been removed from antic_in (is not optimized) */
1186 if (! environ->first_iter && is_redundant(block, expr))
1190 /* If it is not the same value already existing along every predecessor
1191 and it is defined by some predecessor then it is partially redundant. */
1192 if (! partially_redundant || fully_redundant)
1198 * Updates the new_set of a block by adding the new_set of
1199 * the immediate dominating block.
1203 static void update_new_set(ir_node *block, ir_node *idom)
1207 ir_valueset_iterator_t iter;
1208 block_info *curr_info = get_block_info(block);
1209 block_info *idom_info = get_block_info(idom);
1212 DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1213 foreach_valueset(idom_info->new_set, value, expr, iter) {
1214 /* inherit new_set from immediate dominator */
1215 ir_valueset_insert(curr_info->new_set, value, expr);
1216 /* replace in avail_out */
1217 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1219 #ifdef DEBUG_libfirm
1221 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1223 } /* update_new_set */
1227 * Returns redundant flag of node irn in block block.
1229 static unsigned is_redundant(ir_node *block, ir_node *irn)
1234 /* TODO Needs to use a flag, because antic_done should only be used
1235 if node is finally processed by insert_nodes. */
1241 * Checks if hoisting irn is greedy.
1242 * Greedy hoisting means that there are non partially redundant nodes
1243 * hoisted. This happens if a partially redundant node has
1244 * non redundant predecessors.
1246 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1248 int block_arity = get_irn_arity(block);
1249 int arity = get_irn_arity(irn);
1251 block_info *info = get_block_info(block);
1253 /* As long as the predecessor values are available in all predecessor blocks,
1254 we can hoist this value. */
1255 for (pos = 0; pos < block_arity; ++pos) {
1256 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1257 block_info *pred_info = get_block_info(pred_block);
1259 for (i = 0; i < arity; ++i) {
1260 ir_node *pred = get_irn_n(irn, i);
1267 if (get_loop_depth(get_irn_loop(pred_block)) > get_loop_depth(get_irn_loop(get_nodes_block(irn))))
1270 if (is_Phi(pred) && get_nodes_block(pred) == block)
1273 DB((dbg, LEVEL_3, "pred %+F\n", pred));
1274 value = identify(pred);
1275 leader = ir_valueset_lookup(info->antic_in, value);
1278 DB((dbg, LEVEL_3, "lead %+F\n", leader));
1279 trans = get_translated(pred_block, leader);
1282 DB((dbg, LEVEL_3, "trans %+F\n", trans));
1287 trans_val = identify(trans);
1288 DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1289 if (is_irn_constlike(trans_val))
1291 avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1296 /* only optimize if predecessors have been optimized */
1297 if (ir_valueset_lookup(info->antic_done, value) == NULL)
1302 /* Try to reduce cut. Turned out to be not very effective. */
1303 /* TODO Use out edges. */
1304 if (get_irn_outs_computed(irn) && get_irn_n_outs(irn) > 1)
1313 * Perform insertion of partially redundant values.
1314 * For every block node, do the following:
1315 * 1. Propagate the NEW_SETS of the dominator into the current block.
1316 * If the block has multiple predecessors,
1317 * 2a. Iterate over the ANTIC expressions for the block to see if
1318 * any of them are partially redundant.
1319 * 2b. If so, insert them into the necessary predecessors to make
1320 * the expression fully redundant.
1321 * 2c. Insert a new Phi merging the values of the predecessors.
1322 * 2d. Insert the new Phi, and the new expressions, into the
1325 * @param block the block
1326 * @param ctx the walker environment
1328 static void insert_nodes_walker(ir_node *block, void *ctx)
1330 pre_env *env = (pre_env*)ctx;
1331 int arity = get_irn_arity(block);
1337 ir_valueset_iterator_t iter;
1343 if (! is_Block(block))
1346 /* ensure that even the start block has a new_set */
1347 info = get_block_info(block);
1349 ir_valueset_del(info->new_set);
1350 info->new_set = ir_valueset_new(16);
1352 if (block == env->start_block)
1355 DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1357 idom = get_Block_idom(block);
1358 update_new_set(block, idom);
1360 /* process only path joining blocks */
1366 stack = plist_new();
1367 foreach_valueset(info->antic_in, value, expr, iter) {
1368 /* inverse topologic */
1369 plist_insert_front(stack, expr);
1373 /* This is the main reason antic_in is preverred over antic_out;
1374 we may iterate over every anticipated value first and not
1375 over the predecessor blocks. */
1376 foreach_valueset(info->antic_in, value, expr, iter) {
1382 if (ir_valueset_lookup(info->antic_done, value))
1385 /* filter phi nodes from antic_in */
1389 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1391 /* A value computed in the dominator is totally redundant.
1392 Hence we have nothing to insert. */
1393 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1394 DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1395 DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1398 ir_valueset_insert(info->antic_done, value, expr);
1404 if (is_hoisting_greedy(expr, block)) {
1405 DB((dbg, LEVEL_2, "greedy\n"));
1410 mode = is_partially_redundant(block, expr, value);
1415 if (is_hoisting_greedy(expr, block)) {
1416 DB((dbg, LEVEL_2, "Better greed: greedy\n"));
1421 #if LOADS || DIVMODS
1422 /* save old mode_M phis to remove keepalive edges later */
1423 if (is_memop(expr)) {
1424 ir_node *mem = get_memop_mem(expr);
1425 if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1426 ir_nodeset_insert(env->keeps, mem);
1431 #ifdef DEBUG_libfirm
1432 if (! is_Proj(expr)) {
1433 if (env->first_iter)
1434 inc_stats(gvnpre_stats->first_iter_found);
1435 inc_stats(gvnpre_stats->partially);
1438 inc_stats(gvnpre_stats->loads);
1439 else if (is_Div(expr) || is_Mod(expr))
1440 inc_stats(gvnpre_stats->divmods);
1443 phi_in = XMALLOCN(ir_node *, arity);
1445 /* for each predecessor block */
1446 for (pos = 0; pos < arity; ++pos) {
1447 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1448 block_info *pred_info;
1450 pred_info = get_block_info(pred_block);
1452 if (! pred_info->found) {
1454 int node_arity = get_irn_arity(expr);
1455 ir_node **in = XMALLOCNZ(ir_node *, node_arity);
1458 ir_node *target_block = pred_block;
1460 for (i = 0; i < node_arity; ++i) {
1461 ir_node *pred = get_irn_n(expr, i);
1462 ir_node *value = identify(pred);
1468 DB((dbg, LEVEL_3, "pred %+F\n", pred));
1469 value = identify(pred);
1470 /* transform knowledge over anti leader into knowledge over leader */
1471 /* get leader for pred to lookup its translated value */
1472 leader = ir_valueset_lookup(info->antic_in, value);
1475 DB((dbg, LEVEL_3, "lead %+F\n", leader));
1477 trans = get_translated(pred_block, leader);
1480 DB((dbg, LEVEL_3, "trans %+F\n", trans));
1482 /* in case of phi, we are done */
1483 if (is_Phi(pred) && get_nodes_block(pred) == block) {
1488 /* handle load predecessor */
1489 if (is_Load(trans)) {
1494 trans_val = identify(trans);
1495 DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1496 /* constants are always available but not in avail set */
1497 if (is_irn_constlike(trans_val)) {
1503 In case of loads we need to make sure the hoisted
1504 loads are found despite their unique value. */
1505 avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1506 DB((dbg, LEVEL_3, "avail %+F\n", avail));
1508 assert(avail && "predecessor has to be available");
1513 target_block = get_nodes_block(in[0]);
1515 /* copy node to represent the new value.
1516 We do not translate nodes that do not need translation,
1517 so we use the newly created nodes as value representatives only.
1518 Their block is not important, because we create new ones during
1519 the insert node phase. */
1520 trans = new_ir_node(
1521 get_irn_dbg_info(expr),
1526 get_irn_arity(expr),
1529 /* We need the attribute copy here, because the Hash value of a
1530 node might depend on it. */
1531 copy_node_attr(environ->graph, expr, trans);
1532 new_value = identify_or_remember(trans);
1534 DB((dbg, LEVEL_3, "Use new %+F in %+F because expr %+F(%+F) not available\n", trans, pred_block, expr, value));
1536 /* value is now available in target block through trans
1537 insert (not replace) because it has not been available */
1538 ir_valueset_insert(pred_info->avail_out, value, trans);
1539 ir_valueset_insert(pred_info->avail_out, new_value, trans);
1541 DB((dbg, LEVEL_4, "SET AVAIL value %+F trans %+F in %+F\n", value, trans, pred_block));
1542 DB((dbg, LEVEL_4, "SET AVAIL newvalue %+F trans %+F in %+F\n", new_value, trans, pred_block));
1544 phi_in[pos] = trans;
1546 /* value available */
1547 phi_in[pos] = pred_info->avail;
1549 DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1552 /* We do not connect tuples as they will be connected automatically
1553 by the corresponding projections. */
1554 if (get_irn_mode(expr) != mode_T) {
1556 phi = new_r_Phi(block, arity, phi_in, mode);
1557 DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1559 /* this value is now available through the new phi */
1560 ir_valueset_replace(info->avail_out, value, phi);
1561 ir_valueset_insert(info->new_set, value, phi);
1565 /* already optimized this value in this block */
1566 ir_valueset_insert(info->antic_done, value, expr);
1572 Better greed first determines which values are redundant
1573 and decides then which to take.
1574 insert_nodes needs to be split up for that. The cycle could be
1575 for each block: flag redundant nodes,
1576 use heuristic to adjust these flags (also consider antic_done),
1578 This way we could decide if we hoist a non redundant node,
1579 if all its successors are redundant.
1580 Or we might try to minimize the cut along hoisted nodes and their
1581 non redundant successors.
1584 plist_element_t *it;
1585 /* iterate in inverse topological order */
1586 foreach_plist(stack, it) {
1587 ir_node *irn = (ir_node *)plist_element_get_value(it);
1588 ir_node *block = get_nodes_block(irn);
1592 /* does irn only have redundant successors? */
1594 /* TODO we need to use edges */
1595 assert(get_irn_outs_computed(irn));
1597 for (j = get_irn_n_outs(irn) - 1; j >= 0; --j) {
1598 ir_node *succ = get_irn_out(irn, j);
1600 /* if succ and irn are in the same block */
1601 if (get_nodes_block(succ) == block && is_redundant(block, succ)) {
1610 flag_redundant(irn, 1);
1619 static void update_new_set_walker(ir_node *block, void *ctx)
1621 pre_env *env = (pre_env*)ctx;
1623 if (! is_Block(block))
1625 if (block == env->start_block)
1628 update_new_set(block, get_Block_idom(block));
1632 * Domtree block walker to insert nodes into the highest
1636 static void hoist_high(ir_node *block, void *ctx)
1638 pre_env *env = (pre_env*)ctx;
1639 block_info *curr_info;
1640 ir_valueset_iterator_t iter;
1643 int arity = get_irn_arity(block);
1645 if (! is_Block(block))
1648 curr_info = get_block_info(block);
1650 if (curr_info->new_set)
1651 ir_valueset_del(curr_info->new_set);
1652 curr_info->new_set = ir_valueset_new(16);
1654 if (block == env->start_block)
1660 DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1662 /* foreach entry optimized by insert node phase */
1663 foreach_valueset(curr_info->antic_done, value, expr, iter) {
1666 if (is_memop(expr) || is_Proj(expr))
1669 DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
1671 /* visit hoisted expressions */
1672 for (pos = 0; pos < arity; ++pos) {
1673 /* standard target is predecessor block */
1674 ir_node *target = get_Block_cfgpred_block(block, pos);
1675 block_info *pred_info = get_block_info(target);
1677 ir_node *new_target;
1678 ir_node *trans_expr;
1679 ir_node *trans_value;
1683 unsigned nest_depth;
1684 block_info *dom_info;
1686 /* get phi translated value */
1687 trans_expr = get_translated(target, expr);
1688 trans_value = identify(trans_expr);
1689 avail = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1691 /* get the used expr on this path */
1693 /* TODO when does this happen? */
1697 avail_arity = get_irn_arity(avail);
1698 value = identify(avail);
1700 /* anticipation border */
1702 nest_depth = get_loop_depth(get_irn_loop(target));
1703 /* By using block (instead of target) as initial block,
1704 we only allow hoisting into a common block of
1705 both predecessor blocks. */
1708 while (dom && dom != environ->start_block) {
1710 dom = get_Block_idom(dom);
1711 dom_info = get_block_info(dom);
1712 DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
1714 /* TODO antic_in means hoistable above block,
1715 but we need 'hoistable into block'. */
1717 /* check if available node ist still anticipated and clean */
1718 if (! ir_valueset_lookup(dom_info->antic_in, value)) {
1719 DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
1723 nest_depth = get_loop_depth(get_irn_loop(dom));
1725 /* do not hoist into loops */
1726 if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
1727 DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
1728 /* not a suitable location */
1732 /* check if operands die */
1734 /* check for uses on current path*/
1735 for (i = 0; i < avail_arity; i++) {
1736 ir_node *pred = get_irn_n(avail, i);
1737 ir_node *pred_value = identify(pred);
1742 DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
1744 if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
1745 DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
1750 /* check every successor */
1751 foreach_out_edge(pred, edge) {
1752 ir_node *succ = get_edge_src_irn(edge);
1753 DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
1755 /* check only successors on current path to end */
1756 if (block_dominates(dom, get_nodes_block(succ))) {
1757 ir_node *succ_value = identify(succ);
1759 /* Do we have another user than avail?
1760 Then predecessor is not dead after removal of avail. */
1761 if (succ_value != value) {
1762 DB((dbg, LEVEL_4, "fail: still used in %+F\n", succ));
1772 /* only try direct dominator */
1777 block_info *target_info = get_block_info(new_target);
1778 int nn_arity = get_irn_arity(avail);
1779 ir_node **in = XMALLOCN(ir_node *, nn_arity);
1783 DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
1784 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1786 for (i = 0; i < nn_arity; ++i) {
1787 ir_node *pred = get_irn_n(avail, i);
1788 ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
1793 get_irn_dbg_info(avail),
1797 get_irn_mode(avail),
1802 identify_or_remember(nn);
1803 /* NOTE: Nodes are inserted into a dominating block and should
1804 be available from this point on. Currently we do not push
1805 the availability information through during the walk. */
1806 ir_valueset_insert(target_info->new_set, value, nn);
1813 /* --------------------------------------------------------
1814 * Elimination of fully redundant nodes
1815 * --------------------------------------------------------
1819 * Walker which finds redundant nodes using avail_out sets
1820 * and exchanges them for existing ones.
1821 * We cannot change the graph here as this would affect
1822 * the hash values of the nodes.
1824 * @param irn the node
1825 * @param ctx the walker environment
1827 static void eliminate(ir_node *irn, void *ctx)
1829 pre_env *env = (pre_env*)ctx;
1831 if (! is_Block(irn)) {
1832 ir_node *block = get_nodes_block(irn);
1833 block_info *info = get_block_info(block);
1834 ir_node *value = identify(irn);
1836 if (value != NULL) {
1837 ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1840 if (expr && get_irn_idx(expr) <= env->last_idx)
1843 if (expr != NULL && expr != irn) {
1844 elim_pair *p = OALLOC(env->obst, elim_pair);
1848 p->next = env->pairs;
1849 if (get_irn_idx(expr) > env->last_idx)
1850 p->reason = FS_OPT_GVN_PARTLY;
1852 p->reason = FS_OPT_GVN_FULLY;
1854 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1861 * Do all the recorded changes and optimize
1862 * newly created Phi's.
1864 * @param pairs list of elimination pairs
1866 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1869 ir_nodeset_iterator_t iter;
1871 ir_node *end = environ->end_node;
1873 for (p = pairs; p != NULL; p = p->next) {
1874 /* might be already changed */
1875 p->new_node = skip_Id(p->new_node);
1878 /* happens with load nodes */
1879 if (p->new_node == p->old_node)
1882 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1884 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1885 * which we can optimize here */
1886 if (is_Phi(p->new_node)) {
1888 ir_node *res = NULL;
1890 for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1891 ir_node *pred = get_irn_n(p->new_node, i);
1893 if (pred != p->old_node) {
1902 exchange(p->new_node, res);
1906 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1908 exchange(p->old_node, p->new_node);
1911 /* remove keep alive edges of unused mode_M phis */
1912 foreach_ir_nodeset(keeps, m_phi, iter) {
1913 remove_End_keepalive(end, m_phi);
1915 } /* eliminate_nodes */
1917 /* --------------------------------------------------------
1919 * --------------------------------------------------------
1923 * Gvn_Pre algorithm.
1925 * @param irg the graph
1927 static void gvn_pre(ir_graph *irg, pre_env *env)
1929 unsigned antic_iter;
1930 unsigned insert_iter;
1932 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1934 /* allocate block info */
1935 irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1937 ir_nodehashmap_init(&value_map);
1939 /* generate exp_gen */
1940 irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
1941 dump_all_expgen_sets(env->list);
1943 /* compute the avail_out sets for all blocks */
1944 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
1946 /* compute the anticipated value sets for all blocks */
1948 env->first_iter = 1;
1950 /* antic_in passes */
1953 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
1955 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
1956 env->first_iter = 0;
1957 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1958 } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
1960 DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
1962 ir_nodeset_init(env->keeps);
1964 env->first_iter = 1;
1965 /* compute redundant expressions */
1968 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
1970 /* TODO topologically top down would be better; fewer iterations. */
1971 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
1972 env->first_iter = 0;
1973 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1974 } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
1975 DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
1978 /* An attempt to reduce lifetime by hoisting already hoisted values
1979 even higher, if their operands die. */
1980 dom_tree_walk_irg(irg, hoist_high, NULL, env);
1981 /* update avail_out for elimination */
1982 dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
1985 /* TODO deactivate edges to prevent intelligent removal of nodes */
1986 edges_deactivate(environ->graph);
1988 /* last step: eliminate nodes */
1989 irg_walk_graph(irg, NULL, eliminate, env);
1990 eliminate_nodes(env->pairs, env->keeps);
1992 ir_nodeset_destroy(env->keeps);
1996 * Gvn_Pre pass for graph irg.
1998 * @param irg the graph
2000 void do_gvn_pre(ir_graph *irg)
2002 struct obstack obst;
2005 optimization_state_t state;
2006 block_info *block_info;
2008 /* bads and unreachables cause too much trouble with dominance
2009 loop info for endless loop detection
2010 no critical edges is GVN-PRE precondition
2012 assure_irg_properties(irg,
2013 IR_GRAPH_PROPERTY_NO_BADS
2014 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
2015 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
2016 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
2017 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
2018 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
2020 /* register a debug mask */
2021 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
2023 save_optimization_state(&state);
2024 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2026 edges_activate(irg);
2029 DEBUG_ONLY(init_stats();)
2031 /* setup environment */
2032 obstack_init(&obst);
2036 env.start_block = get_irg_start_block(irg);
2037 env.end_block = get_irg_end_block(irg);
2038 env.end_node = get_irg_end(irg);
2041 /* last index is no node */
2042 env.last_idx = get_irg_last_idx(irg) - 1;
2044 /* Detect and set links of infinite loops to non-zero. */
2047 /* Switch on GCSE. We need it to correctly compute
2048 the value of a node, which is independent from
2050 set_opt_global_cse(1);
2051 /* new_identities */
2052 if (irg->value_table != NULL)
2053 del_pset(irg->value_table);
2054 /* initially assumed nodes in pset are 512 */
2055 irg->value_table = new_pset(compare_gvn_identities, 512);
2057 /* do GVN-PRE pass */
2059 DEBUG_ONLY(print_stats();)
2061 /* clean up: delete all sets */
2062 for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2063 free_block_info(block_info);
2066 DEBUG_ONLY(free_stats();)
2067 ir_nodehashmap_destroy(&value_map);
2068 obstack_free(&obst, NULL);
2069 ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2071 /* Pin the graph again.
2072 This is needed due to the use of set_opt_global_cse(1) */
2073 set_irg_pinned(irg, op_pin_state_pinned);
2074 restore_optimization_state(&state);
2075 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2077 /* TODO there seem to be optimizations that try to use the existing
2079 new_identities(irg);
2082 /* Creates an ir_graph pass for do_gvn_pre. */
2083 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2085 return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);