2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
46 #include "irgraph_t.h"
51 /* suggested by GVN-PRE authors */
52 #define MAX_ANTIC_ITER 10
53 #define MAX_INSERT_ITER 3
55 /* Stops antic iteration from processing endless loops. */
56 #define IGNORE_INF_LOOPS 1
57 /* Stops antic information to flow over infinite loop backedge */
58 #define NO_INF_LOOPS 0
60 /* Attempt to reduce register pressure and reduce code size
65 /* Seamless implementation of handling loads and generally memory
66 dependent nodes with GVN-PRE. */
72 /* Only optimize node directly after phi node if node is only successor. */
75 /* GVN is very limited. This enables optimize_node during value recognition.
76 GVN ist still very limited, since a+1+b+1 doesn't equal a+b+2.
77 TODO Broken for yet unknown reasons. */
78 #define OPTIMIZE_NODES 0
82 /* NIY Choose to be optimized nodes in a more sophisticated way
83 to reduce number of newly introduced phi nodes. */
84 #define BETTER_GREED 0
87 /** Additional info we need for every block. */
88 typedef struct block_info {
89 ir_valueset_t *exp_gen; /* contains this blocks clean expressions */
90 ir_valueset_t *avail_out; /* available values at block end */
91 ir_valueset_t *antic_in; /* clean anticipated values at block entry */
92 ir_valueset_t *antic_done; /* keeps elements of antic_in after insert nodes phase */
93 ir_valueset_t *new_set; /* new by hoisting made available values */
94 ir_nodehashmap_t *trans; /* contains translated nodes translated into block */
95 ir_node *avail; /* saves available node for insert node phase */
96 int found; /* saves kind of availability for insert_node phase */
97 ir_node *block; /* block of the block_info */
98 struct block_info *next; /* links all instances for easy access */
102 * A pair of nodes to be exchanged.
103 * We have to defer the exchange because there are still needed references
106 typedef struct elim_pair {
107 ir_node *old_node; /* node that will be replaced */
108 ir_node *new_node; /* replacement for old_node */
109 struct elim_pair *next; /* links all instances for easy access */
110 int reason; /* reason for the replacement */
113 /** environment for the GVN-PRE algorithm */
114 typedef struct pre_env {
115 ir_graph *graph; /* current graph */
116 struct obstack *obst; /* obstack to allocate on */
117 ir_node *start_block; /* start block of the current graph */
118 ir_node *end_block; /* end block of the current graph */
119 ir_node *end_node; /* end node of the current graph */
120 block_info *list; /* block_info list head */
121 elim_pair *pairs; /* elim_pair list head */
122 ir_nodeset_t *keeps; /* a list of to be removed phis to kill their keep alive edges */
123 unsigned last_idx; /* last node index of input graph */
124 char changes; /* flag for fixed point iterations - non-zero if changes occurred */
125 char first_iter; /* non-zero for first fixed point iteration */
126 int iteration; /* iteration counter */
128 pset *value_table; /* standard value table*/
129 pset *gvnpre_values; /* gvnpre value table */
133 static pre_env *environ;
135 /* custom GVN value map */
136 static ir_nodehashmap_t value_map;
138 /* debug module handle */
139 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
143 /* --------------------------------------------------------
145 * --------------------------------------------------------
148 typedef struct gvnpre_statistics {
155 int first_iter_found;
156 int antic_iterations;
157 int insert_iterations;
161 gvnpre_statistics *gvnpre_stats = NULL;
163 static void init_stats()
165 gvnpre_stats = XMALLOCZ(gvnpre_statistics);
168 static void free_stats()
174 static void print_stats()
176 gvnpre_statistics *stats = gvnpre_stats;
177 DB((dbg, LEVEL_1, "replaced : %d\n", stats->replaced));
178 DB((dbg, LEVEL_1, "antic_in iterations : %d\n", stats->antic_iterations));
179 DB((dbg, LEVEL_1, "insert iterations : %d\n", stats->insert_iterations));
180 DB((dbg, LEVEL_1, "infinite loops : %d\n", stats->infinite_loops));
181 DB((dbg, LEVEL_1, "fully redundant : %d\n", stats->fully));
182 DB((dbg, LEVEL_1, "partially redundant : %d\n", stats->partially));
183 DB((dbg, LEVEL_1, " loads : %d\n", stats->loads));
184 DB((dbg, LEVEL_1, " Divs/Mods : %d\n", stats->divmods));
185 DB((dbg, LEVEL_1, " hoist high : %d\n", stats->hoist_high));
186 DB((dbg, LEVEL_1, " first iteration : %d\n", stats->first_iter_found));
189 #define set_stats(var, value) (var)=(value)
190 #define inc_stats(var) ((var)+=1)
192 /* --------------------------------------------------------
194 * --------------------------------------------------------
200 * @param set the set to dump
201 * @param txt a text to describe the set
202 * @param block the owner block of the set
204 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
206 ir_valueset_iterator_t iter;
207 ir_node *value, *expr;
210 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
212 foreach_valueset(set, value, expr, iter) {
214 DB((dbg, LEVEL_2, "\n"));
216 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
218 DB((dbg, LEVEL_2, " %+F,", expr));
221 DB((dbg, LEVEL_2, "\n}\n"));
222 } /* dump_value_set */
225 * Dump all exp_gen value sets.
227 * @param list the list of block infos to retrieve the sets from
229 static void dump_all_expgen_sets(block_info *list)
231 block_info *block_info;
233 for (block_info = list; block_info != NULL; block_info = block_info->next) {
234 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
240 #define dump_all_expgen_sets(list)
241 #define dump_value_set(set, txt, block)
243 #endif /* DEBUG_libfirm */
245 /* --------------------------------------------------------
247 * --------------------------------------------------------
251 * Compares node collisions in valuetable.
252 * Modified identities_cmp().
254 static int compare_gvn_identities(const void *elt, const void *key)
256 ir_node *a = (ir_node *)elt;
257 ir_node *b = (ir_node *)key;
260 if (a == b) return 0;
262 /* phi nodes kill predecessor values and are always different */
263 if (is_Phi(a) || is_Phi(b))
266 /* memops are not the same, even if we want to optimize them
267 we have to take the order in account */
268 if (is_memop(a) || is_memop(b) ||
269 get_irn_mode(a) == mode_T ||
270 get_irn_mode(b) == mode_T) {
271 /* Loads with the same predecessors are the same value;
272 this should only happen after phi translation. */
273 if ((! is_Load(a) || ! is_Load(b)) && (! is_Store(a) || ! is_Store(b)))
277 if ((get_irn_op(a) != get_irn_op(b)) ||
278 (get_irn_mode(a) != get_irn_mode(b))) return 1;
280 /* compare if a's in and b's in are of equal length */
281 irn_arity_a = get_irn_arity(a);
282 if (irn_arity_a != get_irn_arity(b))
285 /* blocks are never the same */
286 if (is_Block(a) || is_Block(b))
289 /* should only be used with gcse enabled */
290 assert(get_opt_global_cse());
292 /* compare a->in[0..ins] with b->in[0..ins] */
293 for (i = 0; i < irn_arity_a; ++i) {
294 ir_node *pred_a = get_irn_n(a, i);
295 ir_node *pred_b = get_irn_n(b, i);
296 if (pred_a != pred_b) {
297 if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
303 * here, we already now that the nodes are identical except their
306 if (a->op->ops.node_cmp_attr)
307 return a->op->ops.node_cmp_attr(a, b);
313 * Identify does a lookup in the GVN valuetable.
314 * To be used when no new GVN values are to be created.
316 * @param e a node representing an expression
317 * @return a node representing the value
319 static ir_node *identify(ir_node *irn)
321 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
324 /* irn represents a new value, so return the leader */
325 return identify_remember(irn);
329 * remember() adds node irn to the GVN valuetable.
330 * Identify_remember only identifies values of nodes with the
331 * same predecessor nodes (not values). By creating a node from the predecessor
332 * values/leaders, a true valuetree is built. Phis kill their predecessor value,
333 * so no circular dependencies need to be resolved.
336 * Maybe this could be implemented with a custom node hash that takes
337 * phi nodes and true values (instead of predecessors) into account,
338 * resulting in value numbers.
339 * TODO This unnecessarily also handles nodes like calls, which are never equal.
341 * @param irn a node representing an expression
342 * @return the value of the expression
344 static ir_node *remember(ir_node *irn)
346 int arity = get_irn_arity(irn);
349 ir_node **in = XMALLOCN(ir_node *, arity);
352 for (i = 0; i < arity; ++i) {
353 ir_node *pred = get_irn_n(irn, i);
354 /* value and leader at the same time */
355 ir_node *pred_value = identify(pred);
357 /* phi will be translated anyway, so kill the predecessor values
358 this also prevents circular dependencies */
360 /* every phi represents its own value */
365 /* predecessor is not its value representation/the leader */
366 if (pred != pred_value)
371 if (changed && !is_memop(irn) && get_irn_mode(irn) != mode_X) {
372 /* create representative for */
373 ir_node *nn = new_ir_node(
374 get_irn_dbg_info(irn),
376 get_nodes_block(irn),
381 copy_node_attr(environ->graph, irn, nn);
384 /* TODO optimize_node() uses the valuetable and thus the custom
385 identify_cmp() and will fail trying. */
386 environ->graph->value_table = environ->value_table;
387 nn = optimize_node(nn);
388 environ->graph->value_table = environ->gvnpre_values;
391 /* now the value can be determined because the
392 predecessors are the leaders */
393 value = identify_remember(nn);
395 value = identify_remember(irn);
399 DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
400 ir_nodehashmap_insert(&value_map, irn, value);
406 * When the value map has been built we may lookup expressions
407 * and remember them if new.
409 static ir_node *identify_or_remember(ir_node *irn)
411 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
415 return remember(irn);
418 /* --------------------------------------------------------
420 * --------------------------------------------------------
424 * Allocate block info for block block.
426 * @param block the block
427 * @param env the environment
429 static void alloc_block_info(ir_node *block, pre_env *env)
431 block_info *info = OALLOC(env->obst, block_info);
433 set_irn_link(block, info);
434 info->exp_gen = ir_valueset_new(16);
435 info->avail_out = ir_valueset_new(16);
436 info->antic_in = ir_valueset_new(16);
437 info->antic_done = ir_valueset_new(16);
438 info->trans = XMALLOC(ir_nodehashmap_t);
439 ir_nodehashmap_init(info->trans);
441 info->new_set = NULL;
446 info->next = env->list;
448 } /* alloc_block_info */
450 static void free_block_info(block_info *block_info)
452 ir_valueset_del(block_info->exp_gen);
453 ir_valueset_del(block_info->avail_out);
454 ir_valueset_del(block_info->antic_in);
455 if (block_info->trans) {
456 ir_nodehashmap_destroy(block_info->trans);
457 free(block_info->trans);
459 if (block_info->new_set)
460 ir_valueset_del(block_info->new_set);
464 * Bottom up walker that ensures that every block gets a block info.
466 * @param irn the node
467 * @param ctx the environment
469 static void block_info_walker(ir_node *irn, void *ctx)
472 pre_env *env = (pre_env*)ctx;
473 alloc_block_info(irn, env);
478 * Returns the block info of a block.
480 static block_info *get_block_info(ir_node *block)
482 return (block_info*)get_irn_link(block);
485 /* --------------------------------------------------------
486 * Infinite loop analysis
487 * --------------------------------------------------------
491 * Walker to set block marks and loop links to 0.
493 static void clear_block_mark_loop_link(ir_node *block, void *env)
497 if (is_Block(block)) {
498 set_Block_mark(block, 0);
499 set_loop_link(get_irn_loop(block), NULL);
504 * Returns non-zero if block is part of real loop loop.
507 static unsigned in_loop(ir_node *block, ir_loop *loop)
509 ir_loop *l = get_irn_loop(block);
510 ir_loop *outer = get_irg_loop(environ->graph);
513 /* loop tree root is not a loop */
514 if (l == NULL || l == outer)
516 l = get_loop_outer_loop(l);
522 * Returns the outermost real loop of loop.
524 static ir_loop *get_loop_outermost(ir_loop *loop)
526 ir_loop *outer = get_irg_loop(environ->graph);
528 ir_loop *last = NULL;
532 l = get_loop_outer_loop(l);
538 * Topologic bottom-up walker sets links of infinite loops to non-zero.
539 * Block marks are used to flag blocks reachable (from end) on the one hand,
540 * on the other hand they are set if the block is not part of an infinite loop.
542 static void infinite_loop_walker(ir_node *block, void *env)
548 if (! is_Block(block))
551 /* start block has no predecessors */
552 if (block == environ->start_block)
555 arity = get_irn_arity(block);
557 /* block not part of a real loop: no infinite loop */
558 if (get_irn_loop(block) == get_irg_loop(environ->graph))
559 set_Block_mark(block, 1);
561 if (get_Block_mark(block)) {
562 /* reachable block: mark all cf predecessors */
563 for (i = 0; i < arity; ++i) {
564 ir_node *pred = get_Block_cfgpred_block(block, i);
567 set_Block_mark(pred, 1);
570 /* We are in a real loop and see an unreachable block. */
571 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
573 /* flag loop as infinite */
574 set_loop_link(outermost_loop, outermost_loop);
575 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
577 /* The cf predecessors are unreachable, but can never be part of
578 an infinite loop, because we just reached them. So we set the
579 blockmark to prevent triggering the infinite loop detection. */
581 /* passing information to the cf predecessors */
582 for (i = 0; i < arity; ++i) {
583 ir_node *pred = get_Block_cfgpred_block(block, i);
588 /* If our cf predecessor is in the same endless loop,
589 it is also unreachable. */
590 if (in_loop(pred, outermost_loop)) {
591 set_Block_mark(pred, 0);
593 /* When we leave the unreachable loop, we artificially
594 declare the cf predecessor reachable. */
595 set_Block_mark(pred, 1);
602 * Sets loop links of outermost infinite loops to non-zero.
604 static void analyse_loops(ir_graph *irg)
606 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
608 /* reset block mark and loop links */
609 irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
611 /* mark end block reachable */
612 set_Block_mark(get_irg_end_block(irg), 1);
613 irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
615 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
618 #if IGNORE_INF_LOOPS || NO_INF_LOOPS
620 * Returns non-zero if block is part of an infinite loop.
622 static unsigned is_in_infinite_loop(ir_node *block)
626 assert(is_Block(block));
627 loop = get_irn_loop(block);
630 loop = get_loop_outermost(loop);
632 return (get_loop_link(loop) != NULL);
638 /* --------------------------------------------------------
640 * --------------------------------------------------------
644 * Returns non-zero if a node is movable and a possible candidate for PRE.
646 static unsigned is_nice_value(ir_node *n)
648 ir_mode *mode = get_irn_mode(n);
653 #if LOADS || OLD_DIVMODS || DIVMODS
654 if (is_Proj(n) && mode != mode_X && mode != mode_T)
663 return get_Load_volatility(n) == volatility_non_volatile;
665 return get_Store_volatility(n) == volatility_non_volatile;
668 if (get_irn_pinned(n) == op_pin_state_pinned)
671 if (! mode_is_data(mode)) {
672 if (! is_Div(n) && ! is_Mod(n))
679 * Checks if a node n is clean in block block for exp_gen.
682 * @param block the block
683 * @return non-zero value for clean node
685 static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
692 if (! is_nice_value(n))
696 /* filter loads with no phi predecessor from antic_in */
697 if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
699 if (is_Store(n) && ! is_Phi(get_Store_mem(n)))
705 ir_node *mem = get_Div_mem(n);
709 if (! is_Phi(mem) && ! is_NoMem(mem))
713 if (is_Mod(n) && ! is_Phi(get_Mod_mem(n)))
717 arity = get_irn_arity(n);
718 for (i = 0; i < arity; ++i) {
719 ir_node *pred = get_irn_n(n, i);
725 /* we only handle current block */
726 if (get_nodes_block(pred) != block)
729 if (! is_nice_value(pred))
732 value = identify(pred);
733 if (! ir_valueset_lookup(valueset, value))
741 * Topological walker puts nodes in top-down topological order into exp_gen set.
742 * Assumed to walk blockwise and nodewise topologically top-down.
744 * @param irn the node
745 * @param ctx the environment
747 static void topo_walker(ir_node *irn, void *ctx)
758 environ->graph->value_table = environ->value_table;
759 identify_remember(irn);
760 environ->graph->value_table = environ->gvnpre_values;
763 /* GVN step: remember the value. */
764 value = remember(irn);
766 if (is_irn_constlike(irn))
769 block = get_nodes_block(irn);
770 info = get_block_info(block);
772 if (get_irn_mode(irn) != mode_X)
773 ir_valueset_insert(info->avail_out, value, irn);
775 /* values that are not in antic_in also dont't need to be in any other set */
777 if (! is_nice_value(irn))
780 if (is_clean_in_block(irn, block, info->exp_gen)) {
781 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
783 ir_valueset_insert(info->exp_gen, value, irn);
787 /* --------------------------------------------------------
789 * --------------------------------------------------------
793 * Gets result of nodes phi translation into block.
795 * @param node the node
796 * @param block the target block
798 * @return a phi translation of node node into block block or NULL
800 static ir_node *get_translated(ir_node *block, ir_node *node)
802 if (is_irn_constlike(node))
805 return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
809 * Saves result of phi translation of node into predecessor
810 * at pos of block succ.
812 * @param node the node
813 * @param succ the successor of the translation target block
814 * @param pos the position of the predecessor block
815 * @param trans the translation result
818 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
820 if (is_irn_constlike(node))
822 /* insert or replace */
823 ir_nodehashmap_insert(map, node, trans);
827 /* Helper function to compare the values of pred and avail_pred. */
828 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
830 ir_node *avail_value = identify(avail_pred);
831 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
832 ir_node *trans_pred = get_translated(pred_block, pred);
835 if (trans_pred == NULL)
837 value = identify(trans_pred);
839 DB((dbg, LEVEL_3, "manual compare %+F %+F\n", pred, avail_pred));
841 return (value == avail_value);
845 * Does phi translation for redundant Div/Mod nodes only.
846 * Returns NULL for non-redundant node, which needs to be phi translated.
848 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
850 ir_node *mem = get_memop_mem(divmod);
851 ir_node *trans = get_translated_pred(block, pos, mem);
856 /* no partial redundancy if this is a mode_M phi */
857 if (is_Proj(trans)) {
858 /* The last memory operation in predecessor block */
859 ir_node *avail_op = get_Proj_pred(trans);
861 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
862 unsigned left, right;
864 if (is_Div(avail_op)) {
865 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
866 get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
868 left = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
869 right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
874 } else if (is_Mod(avail_op)) {
875 if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
877 left = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
878 right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
891 * Translates an expression above a Phi.
893 * @param node the node
894 * @param block the block the node is translated into
895 * @param pos the input number of the destination block
897 * @return a node representing the translated value
899 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *leaderset)
904 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
909 if (get_nodes_block(node) == block)
910 return get_Phi_pred(node, pos);
911 /* this phi does not need translation */
914 arity = get_irn_arity(node);
917 if (is_Div(node) || is_Mod(node)) {
918 ir_node *avail_op = phi_translate_divmod(node, block, pos);
925 in = ALLOCANZ(ir_node *, arity);
927 /* A value has several representatives. The anti leader is chosen to be
928 the main representative. If we access a node as representative of a
929 value we always use the anti leader. The anti leader can be found by
930 antic_in(identify(node)). */
931 for (i = 0; i < arity; ++i) {
932 ir_node *pred = get_irn_n(node, i);
933 ir_node *value = identify(pred);
934 /* get leader for pred to lookup its translated value */
935 ir_node *leader = ir_valueset_lookup(leaderset, value);
942 /* we cannot find this value in antic_in, because the value
943 has (possibly) changed! */
944 pred_trans = get_translated(pred_block, leader);
949 ir_node *mem = get_Div_mem(node);
954 pred_trans = get_Div_mem(node);
958 DB((dbg, LEVEL_3, "trans %+F of %+F is %+F\n", leader, pred_block, pred_trans));
959 if (pred_trans == NULL) {
962 new_pred = pred_trans;
964 /* loads: Predecessor is a memory phi, which translated yields a proj or
965 another phi. In case of projection and a load predecessor,
966 skip them and use the loads memory. */
967 if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
969 ir_node *loadstore = get_Proj_pred(pred_trans);
970 /* If we do not translate this node, we will get its value wrong. */
973 if (is_Load(loadstore)) {
974 /* Put new load under the adjacent loads memory edge
975 such that GVN may compare them. */
976 new_pred = get_Load_mem(loadstore);
977 } else if (is_Store(loadstore)) {
978 new_pred = get_Store_mem(loadstore);
982 /* predecessor value changed, so translation is needed */
983 if (identify(new_pred) != identify(pred))
988 DB((dbg, LEVEL_4, "in %+F\n", new_pred));
995 DB((dbg, LEVEL_3, "Translate\n"));
998 pred_block = get_nodes_block(in[0]);
1000 /* copy node to represent the new value.
1001 We do not translate nodes that do not need translation,
1002 so we use the newly created nodes as value representatives only.
1003 Their block is not important, because we create new ones during
1004 the insert node phase. */
1006 get_irn_dbg_info(node),
1013 /* We need the attribute copy here, because the Hash value of a
1014 node might depend on it. */
1015 copy_node_attr(environ->graph, node, nn);
1016 /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
1017 because it already uses availability. */
1019 DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
1024 * Block-walker, computes Antic_in(block).
1025 * Builds a value tree out of the graph by translating values
1028 * @param block the block
1029 * @param ctx the walker environment
1031 static void compute_antic(ir_node *block, void *ctx)
1033 pre_env *env = (pre_env*)ctx;
1034 block_info *succ_info;
1040 ir_valueset_iterator_t iter;
1043 /* filter blocks from topological walker */
1044 if (! is_Block(block))
1047 /* the end block has no successor */
1048 if (block == env->end_block)
1051 info = get_block_info(block);
1053 size = ir_valueset_size(info->antic_in);
1054 n_succ = get_Block_n_cfg_outs(block);
1057 if (env->first_iter) {
1058 #if IGNORE_INF_LOOPS
1059 /* keep antic_in of infinite loops empty */
1060 if (! is_in_infinite_loop(block)) {
1061 foreach_valueset(info->exp_gen, value, expr, iter) {
1062 ir_valueset_insert(info->antic_in, value, expr);
1066 foreach_valueset(info->exp_gen, value, expr, iter) {
1067 ir_valueset_insert(info->antic_in, value, expr);
1072 /* successor might have phi nodes */
1073 if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
1074 succ = get_Block_cfg_out(block, 0);
1075 int pos = get_Block_cfgpred_pos(succ, block);
1076 succ_info = get_block_info(succ);
1078 /* initialize translated set */
1079 if (env->first_iter) {
1080 info->trans = XMALLOC(ir_nodehashmap_t);
1081 ir_nodehashmap_init(info->trans);
1084 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1085 ir_node *trans = get_translated(block, expr);
1086 ir_node *trans_value;
1090 trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
1092 /* create new value if necessary */
1093 trans_value = identify_or_remember(trans);
1095 DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1097 /* On value change (phi present) we need the translated node
1098 to represent the new value for possible further translation. */
1099 if (value != trans_value)
1104 if (is_clean_in_block(expr, block, info->antic_in)) {
1106 /* Prevent information flow over the backedge of endless loops. */
1107 if (env->iteration <= 2 || (is_backedge(succ, pos) && !is_in_infinite_loop(succ))) {
1108 ir_valueset_replace(info->antic_in, trans_value, represent);
1111 ir_valueset_replace(info->antic_in, trans_value, represent);
1114 set_translated(info->trans, expr, represent);
1117 } else if (n_succ > 1) {
1119 ir_node *common = NULL;
1120 ir_node *succ0 = get_Block_cfg_out(block, 0);
1121 block_info *succ0_info = get_block_info(succ0);
1123 /* disjoint of antic_ins */
1124 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1125 /* iterate over remaining successors */
1126 for (i = 1; i < n_succ; ++i) {
1127 ir_node *succ = get_Block_cfg_out(block, i);
1128 block_info *succ_info = get_block_info(succ);
1130 /* value in antic_in? */
1131 common = ir_valueset_lookup(succ_info->antic_in, value);
1136 if (common && is_clean_in_block(expr, block, info->antic_in))
1137 ir_valueset_replace(info->antic_in, value, expr);
1142 DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1144 if (size != ir_valueset_size(info->antic_in))
1148 /* --------------------------------------------------------
1149 * Main algorithm Avail_out
1150 * --------------------------------------------------------
1154 * Computes Avail_out(block):
1156 * Avail_in(block) = Avail_out(dom(block))
1157 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1160 * This function must be called in the top-down topological order:
1161 * Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1163 * @param block the block
1164 * @param ctx walker context
1166 static void compute_avail_top_down(ir_node *block, void *ctx)
1168 pre_env *env = (pre_env*)ctx;
1171 if (block == env->end_block)
1174 info = get_block_info(block);
1176 /* Add all nodes from the immediate dominator.
1177 This ensures that avail_out contains the leader. */
1178 if (block != env->start_block) {
1179 ir_node *dom_block = get_Block_idom(block);
1180 block_info *dom_info = get_block_info(dom_block);
1183 ir_valueset_iterator_t iter;
1185 foreach_valueset(dom_info->avail_out, value, expr, iter)
1186 /* replace: use the leader from dominator, not local exp_gen */
1187 ir_valueset_replace(info->avail_out, value, expr);
1190 DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1193 /* --------------------------------------------------------
1194 * Main algorithm redundancy detection
1195 * --------------------------------------------------------
1199 * Returns a valid mode if the value of expr is a partially redundant value.
1201 * @param block the block
1202 * @param expr the expression
1204 * @return mode of the expression if it is partially redundant else NULL
1206 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1208 ir_node *first_avail = NULL;
1210 int arity = get_irn_arity(block);
1211 int fully_redundant = 1;
1212 int partially_redundant = 0;
1213 ir_mode *mode = NULL;
1215 DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1217 /* for each predecessor blocks */
1218 for (pos = 0; pos < arity; ++pos) {
1219 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1220 block_info *pred_info;
1221 ir_node *trans_expr;
1222 ir_node *trans_value;
1223 ir_node *avail_expr;
1225 pred_info = get_block_info(pred_block);
1226 trans_expr = get_translated(pred_block, expr);
1227 trans_value = identify(trans_expr);
1229 if (is_Const(trans_expr))
1230 avail_expr = trans_expr;
1232 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1234 /* value might be available through a not yet existing constant */
1235 if (avail_expr == NULL && is_Const(trans_expr)) {
1236 /* limit range of new constants */
1237 ir_mode *cmode = get_irn_mode(trans_expr);
1238 ir_tarval *upper = new_tarval_from_long(127, cmode);
1239 ir_tarval *lower = new_tarval_from_long(-127, cmode);
1240 ir_tarval *c = get_Const_tarval(trans_expr);
1242 /* tarval within range? */
1243 if (tarval_cmp(lower, c) == ir_relation_less_equal &&
1244 tarval_cmp(c, upper) == ir_relation_less_equal) {
1245 avail_expr = trans_expr;
1251 DB((dbg, LEVEL_3, "avail_expr %+F trans_expr %+F\n", avail_expr, trans_expr));
1253 if (avail_expr == NULL) {
1254 pred_info->avail = trans_expr;
1255 pred_info->found = 0;
1256 fully_redundant = 0;
1258 /* expr is available, use the leader */
1259 pred_info->avail = avail_expr;
1260 pred_info->found = 1;
1261 mode = get_irn_mode(avail_expr);
1262 partially_redundant = 1;
1264 if (first_avail == NULL)
1265 first_avail = avail_expr;
1266 else if (first_avail != avail_expr)
1267 /* Multiple different expressions are available,
1268 This is why we need no cut over avail_out sets. */
1269 fully_redundant = 0;
1271 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1276 /* value is redundant from last iteration,
1277 but has not been removed from antic_in (is not optimized) */
1278 if (! environ->first_iter && is_redundant(block, expr))
1282 /* If it is not the same value already existing along every predecessor
1283 and it is defined by some predecessor then it is partially redundant. */
1284 if (! partially_redundant || fully_redundant)
1290 * Updates the new_set of a block by adding the new_set of
1291 * the immediate dominating block.
1295 static void update_new_set(ir_node *block, ir_node *idom)
1299 ir_valueset_iterator_t iter;
1300 block_info *curr_info = get_block_info(block);
1301 block_info *idom_info = get_block_info(idom);
1304 DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1305 foreach_valueset(idom_info->new_set, value, expr, iter) {
1306 /* inherit new_set from immediate dominator */
1307 ir_valueset_insert(curr_info->new_set, value, expr);
1308 /* replace in avail_out */
1309 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1311 #ifdef DEBUG_libfirm
1313 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1315 } /* update_new_set */
1319 * Returns redundant flag of node irn in block block.
1321 static unsigned is_redundant(ir_node *block, ir_node *irn)
1326 /* TODO Needs to use a flag, because antic_done should only be used
1327 if node is finally processed by insert_nodes. */
1333 * Checks if hoisting irn is greedy.
1334 * Greedy hoisting means that there are non partially redundant nodes
1335 * hoisted. This happens if a partially redundant node has
1336 * non redundant predecessors.
1338 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1340 int block_arity = get_irn_arity(block);
1341 int arity = get_irn_arity(irn);
1343 block_info *info = get_block_info(block);
1345 /* As long as the predecessor values are available in all predecessor blocks,
1346 we can hoist this value. */
1347 for (pos = 0; pos < block_arity; ++pos) {
1348 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1349 block_info *pred_info = get_block_info(pred_block);
1351 for (i = 0; i < arity; ++i) {
1352 ir_node *pred = get_irn_n(irn, i);
1360 /* Very conservative min cut. Phi might only have 1 user. */
1361 if (is_Phi(pred) && get_irn_n_edges(pred) != 1)
1365 if (is_Phi(pred) && get_nodes_block(pred) == block)
1368 DB((dbg, LEVEL_3, "pred %+F\n", pred));
1369 value = identify(pred);
1370 leader = ir_valueset_lookup(info->antic_in, value);
1373 DB((dbg, LEVEL_3, "lead %+F\n", leader));
1374 trans = get_translated(pred_block, leader);
1377 DB((dbg, LEVEL_3, "trans %+F\n", trans));
1379 trans_val = identify(trans);
1380 DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1382 if (is_Const(trans_val) || is_SymConst(trans_val)) {
1383 /* existing constant */
1384 if (get_irn_idx(trans_val) < environ->last_idx) {
1387 /* limit range of new constants */
1388 ir_mode *cmode = get_irn_mode(trans);
1389 ir_tarval *upper = new_tarval_from_long(128, cmode);
1390 ir_tarval *lower = new_tarval_from_long(-128, cmode);
1391 ir_tarval *c = get_Const_tarval(trans);
1393 /* tarval within range? */
1394 if (tarval_cmp(lower, c) == ir_relation_less &&
1395 tarval_cmp(c, upper) == ir_relation_less) {
1404 if (is_irn_constlike(trans_val))
1407 avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1409 DB((dbg, LEVEL_3, "avail %+F\n", avail));
1413 /* only optimize if predecessors have been optimized */
1414 if (ir_valueset_lookup(info->antic_done, value) == NULL)
1423 * Perform insertion of partially redundant values.
1424 * For every block node, do the following:
1425 * 1. Propagate the NEW_SETS of the dominator into the current block.
1426 * If the block has multiple predecessors,
1427 * 2a. Iterate over the ANTIC expressions for the block to see if
1428 * any of them are partially redundant.
1429 * 2b. If so, insert them into the necessary predecessors to make
1430 * the expression fully redundant.
1431 * 2c. Insert a new Phi merging the values of the predecessors.
1432 * 2d. Insert the new Phi, and the new expressions, into the
1435 * @param block the block
1436 * @param ctx the walker environment
1438 static void insert_nodes_walker(ir_node *block, void *ctx)
1440 pre_env *env = (pre_env*)ctx;
1441 int arity = get_irn_arity(block);
1447 ir_valueset_iterator_t iter;
1453 if (! is_Block(block))
1456 /* ensure that even the start block has a new_set */
1457 info = get_block_info(block);
1459 ir_valueset_del(info->new_set);
1460 info->new_set = ir_valueset_new(16);
1462 if (block == env->start_block)
1465 DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1467 idom = get_Block_idom(block);
1468 update_new_set(block, idom);
1470 /* process only path joining blocks */
1476 stack = plist_new();
1477 foreach_valueset(info->antic_in, value, expr, iter) {
1478 /* inverse topologic */
1479 plist_insert_front(stack, expr);
1483 /* This is the main reason antic_in is preverred over antic_out;
1484 we may iterate over every anticipated value first and not
1485 over the predecessor blocks. */
1486 foreach_valueset(info->antic_in, value, expr, iter) {
1492 if (ir_valueset_lookup(info->antic_done, value))
1495 /* filter phi nodes from antic_in */
1499 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1501 /* A value computed in the dominator is totally redundant.
1502 Hence we have nothing to insert. */
1503 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1504 DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1505 DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1507 ir_valueset_insert(info->antic_done, value, expr);
1512 if (is_hoisting_greedy(expr, block)) {
1513 DB((dbg, LEVEL_2, "greedy\n"));
1518 mode = is_partially_redundant(block, expr, value);
1523 if (is_hoisting_greedy(expr, block)) {
1524 DB((dbg, LEVEL_2, "Better greed: greedy\n"));
1529 #if LOADS || OLD_DIVMODS || DIVMODS
1530 /* save old mode_M phis to remove keepalive edges later */
1531 if (is_memop(expr)) {
1532 ir_node *mem = get_memop_mem(expr);
1533 if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1534 ir_nodeset_insert(env->keeps, mem);
1539 #ifdef DEBUG_libfirm
1540 if (! is_Proj(expr)) {
1541 if (env->first_iter)
1542 inc_stats(gvnpre_stats->first_iter_found);
1543 inc_stats(gvnpre_stats->partially);
1545 if (is_Load(expr) || is_Store(expr))
1546 inc_stats(gvnpre_stats->loads);
1547 else if (is_Div(expr) || is_Mod(expr))
1548 inc_stats(gvnpre_stats->divmods);
1551 phi_in = XMALLOCN(ir_node *, arity);
1553 /* for each predecessor block */
1554 for (pos = 0; pos < arity; ++pos) {
1555 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1556 block_info *pred_info;
1558 pred_info = get_block_info(pred_block);
1560 if (! pred_info->found) {
1562 int node_arity = get_irn_arity(expr);
1563 ir_node **in = XMALLOCNZ(ir_node *, node_arity);
1565 ir_node *new_value, *new_value2;
1566 ir_node *target_block = pred_block;
1568 for (i = 0; i < node_arity; ++i) {
1569 ir_node *pred = get_irn_n(expr, i);
1570 ir_node *value = identify(pred);
1576 /* transform knowledge over the predecessor from
1577 anti-leader world into leader world. */
1579 DB((dbg, LEVEL_3, "pred %+F\n", pred));
1580 value = identify(pred);
1582 /* get leader for pred to lookup its translated value */
1583 leader = ir_valueset_lookup(info->antic_in, value);
1586 DB((dbg, LEVEL_3, "lead %+F\n", leader));
1588 trans = get_translated(pred_block, leader);
1591 DB((dbg, LEVEL_3, "trans %+F\n", trans));
1593 /* in case of phi, we are done */
1594 if (is_Phi(pred) && get_nodes_block(pred) == block) {
1599 trans_val = identify(trans);
1600 DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1602 /* constants are always available but not in avail set */
1603 if (is_irn_constlike(trans_val)) {
1609 In case of loads we need to make sure the hoisted
1610 loads are found despite their unique value. */
1611 avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1612 DB((dbg, LEVEL_3, "avail %+F\n", avail));
1614 assert(avail && "predecessor has to be available");
1619 target_block = get_nodes_block(in[0]);
1621 /* Copy node to represent the new value.
1622 We use translated nodes as value representatives only.
1623 They have anti leaders as predecessors, not leaders!
1624 So we have to create a new node using leaders. */
1625 trans = new_ir_node(
1626 get_irn_dbg_info(expr),
1631 get_irn_arity(expr),
1634 /* We need the attribute copy here, because the Hash value of a
1635 node might depend on it. */
1636 copy_node_attr(environ->graph, expr, trans);
1638 /* value is now available in target block through trans
1639 insert (not replace) because it has not been available */
1640 new_value = identify_or_remember(trans);
1641 ir_valueset_insert(pred_info->avail_out, new_value, trans);
1642 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value));
1644 new_value2 = identify(get_translated(pred_block, expr));
1645 ir_valueset_insert(pred_info->avail_out, new_value2, trans);
1646 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value2));
1648 DB((dbg, LEVEL_3, "Use new %+F in %+F because %+F(%+F) not available\n", trans, pred_block, expr, value));
1650 phi_in[pos] = trans;
1652 /* value available */
1653 phi_in[pos] = pred_info->avail;
1655 DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1658 /* We do not connect tuples as they will be connected automatically
1659 by the corresponding projections. */
1660 if (get_irn_mode(expr) != mode_T) {
1662 phi = new_r_Phi(block, arity, phi_in, mode);
1663 DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1665 /* This value is now available through the new phi.
1666 insert || replace in avail_out */
1667 ir_valueset_replace(info->avail_out, value, phi);
1668 ir_valueset_insert(info->new_set, value, phi);
1672 /* already optimized this value in this block */
1673 ir_valueset_insert(info->antic_done, value, expr);
1679 Better greed first determines which values are redundant
1680 and decides then which to take.
1681 insert_nodes needs to be split up for that. The cycle could be
1682 for each block: flag redundant nodes,
1683 use heuristic to adjust these flags (also consider antic_done),
1685 This way we could decide if we should hoist a non redundant node,
1686 if all its successors are redundant.
1687 Or we might try to minimize the cut along hoisted nodes and their
1688 non redundant successors.
1691 plist_element_t *it;
1692 /* iterate in inverse topological order */
1693 foreach_plist(stack, it) {
1694 ir_node *irn = (ir_node *)plist_element_get_value(it);
1695 ir_node *block = get_nodes_block(irn);
1699 /* does irn only have redundant successors? */
1701 foreach_out_edge(irn, edge) {
1702 ir_node *succ = get_edge_src_irn(edge);
1704 /* if succ and irn are in the same block */
1705 if (get_nodes_block(succ) == block && is_redundant(block, succ)) {
1714 flag_redundant(irn, 1);
1723 static void update_new_set_walker(ir_node *block, void *ctx)
1725 pre_env *env = (pre_env*)ctx;
1727 if (! is_Block(block))
1729 if (block == env->start_block)
1732 update_new_set(block, get_Block_idom(block));
1736 * Domtree block walker to insert nodes with dying operands
1737 * into the highest possible block whilst still being anticipated.
1739 static void hoist_high(ir_node *block, void *ctx)
1741 pre_env *env = (pre_env*)ctx;
1742 block_info *curr_info;
1743 ir_valueset_iterator_t iter;
1746 int arity = get_irn_arity(block);
1748 if (! is_Block(block))
1751 curr_info = get_block_info(block);
1753 if (curr_info->new_set)
1754 ir_valueset_del(curr_info->new_set);
1755 curr_info->new_set = ir_valueset_new(16);
1757 if (block == env->start_block)
1763 DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1765 /* foreach entry optimized by insert node phase */
1766 foreach_valueset(curr_info->antic_done, value, expr, iter) {
1769 /* TODO currently we cannot handle load and their projections */
1770 if (is_memop(expr) || is_Proj(expr))
1773 DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
1775 /* visit hoisted expressions */
1776 for (pos = 0; pos < arity; ++pos) {
1777 /* standard target is predecessor block */
1778 ir_node *target = get_Block_cfgpred_block(block, pos);
1779 block_info *pred_info = get_block_info(target);
1781 ir_node *new_target;
1782 ir_node *trans_expr;
1783 ir_node *trans_value;
1787 unsigned nest_depth;
1788 block_info *dom_info;
1790 /* get phi translated value */
1791 trans_expr = get_translated(target, expr);
1792 trans_value = identify(trans_expr);
1793 avail = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1795 /* get the used expr on this path */
1797 /* TODO when does this happen? */
1801 avail_arity = get_irn_arity(avail);
1802 value = identify(avail);
1804 /* anticipation border */
1806 nest_depth = get_loop_depth(get_irn_loop(target));
1808 /* Either push the hoisted nodes up their path,
1809 or try to put them directly into their common dominator. */
1811 /* By using block (instead of target) as initial block,
1812 we only allow hoisting into a common block of
1813 both predecessor blocks. */
1819 while (dom && dom != get_Block_idom(block)) {
1821 dom = get_Block_idom(dom);
1822 dom_info = get_block_info(dom);
1823 DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
1825 /* TODO Being in antic_in means hoistable above block,
1826 but we need 'hoistable into block'.
1827 This could be achieved by a flag for each valueset pair,
1828 being set during antic computation. */
1830 /* check if available node ist still anticipated and clean */
1831 if (! ir_valueset_lookup(dom_info->antic_in, value)) {
1832 DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
1836 nest_depth = get_loop_depth(get_irn_loop(dom));
1838 /* do not hoist into loops */
1839 if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
1840 DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
1841 /* not a suitable location */
1845 /* check if operands die */
1847 /* check for uses on current path */
1848 for (i = 0; i < avail_arity; i++) {
1849 ir_node *pred = get_irn_n(avail, i);
1850 ir_node *pred_value = identify(pred);
1855 DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
1857 if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
1858 DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
1863 /* check every successor */
1864 foreach_out_edge(pred, edge) {
1865 ir_node *succ = get_edge_src_irn(edge);
1866 DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
1868 /* check only successors on current path to end */
1869 if (block_dominates(dom, get_nodes_block(succ))) {
1870 ir_node *succ_value = identify(succ);
1872 /* Do we have another user than avail?
1873 Then predecessor is not dead after removal of avail. */
1874 if (succ_value != value) {
1875 DB((dbg, LEVEL_4, "still used in %+F\n", succ));
1886 /* only try common dominator */
1891 /* put node into new target block */
1893 block_info *target_info = get_block_info(new_target);
1894 int nn_arity = get_irn_arity(avail);
1895 ir_node **in = XMALLOCN(ir_node *, nn_arity);
1899 DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
1900 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1902 for (i = 0; i < nn_arity; ++i) {
1903 ir_node *pred = get_irn_n(avail, i);
1904 ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
1909 get_irn_dbg_info(avail),
1913 get_irn_mode(avail),
1918 identify_or_remember(nn);
1919 /* TODO Nodes are inserted into a dominating block and should
1920 be available from this point on. Currently we do not push
1921 the availability information through during the walk. */
1922 ir_valueset_insert(target_info->new_set, value, nn);
1923 ir_valueset_insert(target_info->avail_out, value, nn);
1930 /* --------------------------------------------------------
1931 * Elimination of fully redundant nodes
1932 * --------------------------------------------------------
1936 * Walker which finds redundant nodes using avail_out sets
1937 * and exchanges them for existing ones.
1938 * We cannot change the graph here as this would affect
1939 * the hash values of the nodes.
1941 * @param irn the node
1942 * @param ctx the walker environment
1944 static void eliminate(ir_node *irn, void *ctx)
1946 pre_env *env = (pre_env*)ctx;
1948 if (! is_Block(irn)) {
1949 ir_node *block = get_nodes_block(irn);
1950 block_info *info = get_block_info(block);
1951 ir_node *value = identify(irn);
1953 if (value != NULL) {
1954 ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1955 DB((dbg, LEVEL_3, "Elim %+F(%+F) avail %+F\n", irn, value, expr));
1957 if (expr != NULL && expr != irn) {
1958 elim_pair *p = OALLOC(env->obst, elim_pair);
1962 p->next = env->pairs;
1963 if (get_irn_idx(expr) > env->last_idx)
1964 p->reason = FS_OPT_GVN_PARTLY;
1966 p->reason = FS_OPT_GVN_FULLY;
1968 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1975 * Do all the recorded changes and optimize
1976 * newly created Phi's.
1978 * @param pairs list of elimination pairs
1980 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1983 ir_node *end = environ->end_node;
1985 for (p = pairs; p != NULL; p = p->next) {
1986 /* might be already changed */
1987 p->new_node = skip_Id(p->new_node);
1989 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1991 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1992 * which we can optimize here */
1993 if (is_Phi(p->new_node)) {
1995 ir_node *res = NULL;
1997 for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1998 ir_node *pred = get_irn_n(p->new_node, i);
2000 if (pred != p->old_node) {
2009 exchange(p->new_node, res);
2013 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
2015 exchange(p->old_node, p->new_node);
2018 /* remove keep alive edges of unused mode_M phis */
2019 foreach_ir_nodeset(keeps, m_phi, iter) {
2020 remove_End_keepalive(end, m_phi);
2022 } /* eliminate_nodes */
2025 /* --------------------------------------------------------
2027 * --------------------------------------------------------
2031 * Gvn_Pre algorithm.
2033 * @param irg the graph
2034 * @param env the environment
2036 static void gvn_pre(ir_graph *irg, pre_env *env)
2038 unsigned antic_iter;
2039 unsigned insert_iter;
2041 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
2043 /* allocate block info */
2044 irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
2046 ir_nodehashmap_init(&value_map);
2048 /* generate exp_gen */
2049 irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
2050 dump_all_expgen_sets(env->list);
2052 /* compute the avail_out sets for all blocks */
2053 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
2055 /* compute the anticipated value sets for all blocks */
2057 env->first_iter = 1;
2060 /* antic_in passes */
2063 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
2065 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
2066 env->first_iter = 0;
2067 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
2069 } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
2071 DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
2073 ir_nodeset_init(env->keeps);
2075 env->first_iter = 1;
2076 /* compute redundant expressions */
2079 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
2081 /* TODO topologically top down would be better; fewer iterations. */
2082 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
2083 env->first_iter = 0;
2084 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
2085 } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
2086 DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
2089 /* An attempt to reduce lifetimes by hoisting already hoisted values
2090 even higher if their operands die. */
2091 dom_tree_walk_irg(irg, hoist_high, NULL, env);
2092 /* update avail_out for elimination */
2093 dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
2096 /* Deactivate edges to prevent intelligent removal of nodes,
2097 or else we will get deleted nodes which we try to exchange. */
2098 edges_deactivate(environ->graph);
2100 /* eliminate nodes */
2101 irg_walk_graph(irg, NULL, eliminate, env);
2102 eliminate_nodes(env->pairs, env->keeps);
2104 ir_nodeset_destroy(env->keeps);
2108 * Gvn_Pre pass for graph irg.
2110 * @param irg the graph
2112 void do_gvn_pre(ir_graph *irg)
2114 struct obstack obst;
2117 optimization_state_t state;
2118 block_info *block_info;
2120 /* bads and unreachables cause too much trouble with dominance,
2121 loop info for endless loop detection,
2122 no critical edges is PRE precondition
2124 assure_irg_properties(irg,
2125 IR_GRAPH_PROPERTY_NO_BADS
2126 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
2127 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
2128 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
2129 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
2130 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
2132 /* register a debug mask */
2133 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
2135 save_optimization_state(&state);
2136 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2138 edges_activate(irg);
2141 DEBUG_ONLY(init_stats();)
2143 /* setup environment */
2144 obstack_init(&obst);
2148 env.start_block = get_irg_start_block(irg);
2149 env.end_block = get_irg_end_block(irg);
2150 env.end_node = get_irg_end(irg);
2153 env.last_idx = get_irg_last_idx(irg);
2155 /* Detect and set links of infinite loops to non-zero. */
2159 new_identities(irg);
2160 env.value_table = irg->value_table;
2161 irg->value_table = NULL;
2164 /* Switch on GCSE. We need it to correctly compute
2165 the value of a node, which is independent from
2167 set_opt_global_cse(1);
2168 /* new_identities() */
2169 if (irg->value_table != NULL)
2170 del_pset(irg->value_table);
2171 /* initially assumed nodes in pset are 512 */
2172 irg->value_table = new_pset(compare_gvn_identities, 512);
2174 env.gvnpre_values = irg->value_table;
2177 /* do GVN-PRE pass */
2179 DEBUG_ONLY(print_stats();)
2181 /* clean up: delete all sets */
2182 for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2183 free_block_info(block_info);
2186 DEBUG_ONLY(free_stats();)
2187 ir_nodehashmap_destroy(&value_map);
2188 obstack_free(&obst, NULL);
2189 ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2191 /* Pin the graph again.
2192 This is needed due to the use of set_opt_global_cse(1) */
2193 set_irg_pinned(irg, op_pin_state_pinned);
2194 restore_optimization_state(&state);
2195 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2198 irg->value_table = env.value_table;
2199 del_pset(irg->value_table);
2200 irg->value_table = env.gvnpre_values;
2203 /* TODO There seem to be optimizations that try to use the existing
2205 new_identities(irg);
2207 /* TODO assure nothing else breaks. */
2208 set_opt_global_cse(0);
2209 edges_activate(irg);
2212 /* Creates an ir_graph pass for do_gvn_pre. */
2213 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2215 return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);