2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
46 #include "irgraph_t.h"
51 /* suggested by GVN-PRE authors */
52 #define MAX_ANTIC_ITER 10
53 #define MAX_INSERT_ITER 3
55 /* Infinite loops will be unrolled during antic iteration and
56 will iterate until otherwise stopped.
57 This also leaves every possible values of iteration variables in antic_in.
59 #define NO_INF_LOOPS 0
61 /* Attempt to reduce register pressure and reduce code size
66 /* Seamless implementation of handling loads and generally memory
67 dependent nodes with GVN-PRE. */
75 #define NO_INF_LOOPS2 0
77 /* NIY Choose to be optimized nodes in a more sophisticated way
78 to reduce number of newly introduced phi nodes. */
79 #define BETTER_GREED 0
82 /** Additional info we need for every block. */
83 typedef struct block_info {
84 ir_valueset_t *exp_gen; /* contains this blocks clean expressions */
85 ir_valueset_t *avail_out; /* available values at block end */
86 ir_valueset_t *antic_in; /* clean anticipated values at block entry */
87 ir_valueset_t *antic_done; /* keeps elements of antic_in after insert nodes phase */
88 ir_valueset_t *new_set; /* new by hoisting made available values */
89 ir_nodehashmap_t *trans; /* contains translated nodes translated into block */
90 ir_node *avail; /* saves available node for insert node phase */
91 int found; /* saves kind of availability for insert_node phase */
92 ir_node *block; /* block of the block_info */
93 struct block_info *next; /* links all instances for easy access */
97 * A pair of nodes to be exchanged.
98 * We have to defer the exchange because there are still needed references
101 typedef struct elim_pair {
102 ir_node *old_node; /* node that will be replaced */
103 ir_node *new_node; /* replacement for old_node */
104 struct elim_pair *next; /* links all instances for easy access */
105 int reason; /* reason for the replacement */
108 /** environment for the GVN-PRE algorithm */
109 typedef struct pre_env {
110 ir_graph *graph; /* current graph */
111 struct obstack *obst; /* obstack to allocate on */
112 ir_node *start_block; /* start block of the current graph */
113 ir_node *end_block; /* end block of the current graph */
114 ir_node *end_node; /* end node of the current graph */
115 block_info *list; /* block_info list head */
116 elim_pair *pairs; /* elim_pair list head */
117 ir_nodeset_t *keeps; /* a list of to be removed phis to kill their keep alive edges */
118 unsigned last_idx; /* last node index of input graph */
119 char changes; /* flag for fixed point iterations - non-zero if changes occurred */
120 char first_iter; /* non-zero for first fixed point iteration */
121 int iteration; /* iteration counter */
124 static pre_env *environ;
126 /* custom GVN value map */
127 static ir_nodehashmap_t value_map;
129 /* debug module handle */
130 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
134 /* --------------------------------------------------------
136 * --------------------------------------------------------
139 typedef struct gvnpre_statistics {
146 int first_iter_found;
147 int antic_iterations;
148 int insert_iterations;
152 gvnpre_statistics *gvnpre_stats = NULL;
154 static void init_stats()
156 gvnpre_stats = XMALLOCZ(gvnpre_statistics);
159 static void free_stats()
165 static void print_stats()
167 gvnpre_statistics *stats = gvnpre_stats;
168 DB((dbg, LEVEL_1, "replaced : %d\n", stats->replaced));
169 DB((dbg, LEVEL_1, "antic_in iterations : %d\n", stats->antic_iterations));
170 DB((dbg, LEVEL_1, "insert iterations : %d\n", stats->insert_iterations));
171 DB((dbg, LEVEL_1, "infinite loops : %d\n", stats->infinite_loops));
172 DB((dbg, LEVEL_1, "fully redundant : %d\n", stats->fully));
173 DB((dbg, LEVEL_1, "partially redundant : %d\n", stats->partially));
174 DB((dbg, LEVEL_1, " loads : %d\n", stats->loads));
175 DB((dbg, LEVEL_1, " Divs/Mods : %d\n", stats->divmods));
176 DB((dbg, LEVEL_1, " hoist high : %d\n", stats->hoist_high));
177 DB((dbg, LEVEL_1, " first iteration : %d\n", stats->first_iter_found));
180 #define set_stats(var, value) (var)=(value)
181 #define inc_stats(var) ((var)+=1)
183 /* --------------------------------------------------------
185 * --------------------------------------------------------
191 * @param set the set to dump
192 * @param txt a text to describe the set
193 * @param block the owner block of the set
195 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
197 ir_valueset_iterator_t iter;
198 ir_node *value, *expr;
201 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
203 foreach_valueset(set, value, expr, iter) {
205 DB((dbg, LEVEL_2, "\n"));
207 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
209 DB((dbg, LEVEL_2, " %+F,", expr));
212 DB((dbg, LEVEL_2, "\n}\n"));
213 } /* dump_value_set */
216 * Dump all exp_gen value sets.
218 * @param list the list of block infos to retrieve the sets from
220 static void dump_all_expgen_sets(block_info *list)
222 block_info *block_info;
224 for (block_info = list; block_info != NULL; block_info = block_info->next) {
225 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
231 #define dump_all_expgen_sets(list)
232 #define dump_value_set(set, txt, block)
234 #endif /* DEBUG_libfirm */
236 /* --------------------------------------------------------
238 * --------------------------------------------------------
242 * Compares node collisions in valuetable.
243 * Modified identities_cmp().
245 static int compare_gvn_identities(const void *elt, const void *key)
247 ir_node *a = (ir_node *)elt;
248 ir_node *b = (ir_node *)key;
251 if (a == b) return 0;
253 /* phi nodes kill predecessor values and are always different */
254 if (is_Phi(a) || is_Phi(b))
257 /* memops are not the same, even if we want to optimize them
258 we have to take the order in account */
259 if (is_memop(a) || is_memop(b)) {
260 /* Loads with the same predecessors are the same value;
261 this should only happen after phi translation. */
262 if (! is_Load(a) || ! is_Load(b))
266 if ((get_irn_op(a) != get_irn_op(b)) ||
267 (get_irn_mode(a) != get_irn_mode(b))) return 1;
269 /* compare if a's in and b's in are of equal length */
270 irn_arity_a = get_irn_arity(a);
271 if (irn_arity_a != get_irn_arity(b))
274 /* blocks are never the same */
275 if (is_Block(a) || is_Block(b))
278 /* should only be used with gcse enabled */
279 assert(get_opt_global_cse());
281 /* compare a->in[0..ins] with b->in[0..ins] */
282 for (i = 0; i < irn_arity_a; ++i) {
283 ir_node *pred_a = get_irn_n(a, i);
284 ir_node *pred_b = get_irn_n(b, i);
285 if (pred_a != pred_b) {
286 if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
292 * here, we already now that the nodes are identical except their
295 if (a->op->ops.node_cmp_attr)
296 return a->op->ops.node_cmp_attr(a, b);
302 * Identify does a lookup in the GVN valuetable.
303 * To be used when no new GVN values are to be created.
305 * @param e a node representing an expression
306 * @return a node representing the value
308 static ir_node *identify(ir_node *irn)
310 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
313 /* irn represents a new value, so return the leader */
314 return identify_remember(irn);
318 * remember() adds node irn to the GVN valuetable.
319 * Identify_remember only identifies values of nodes with the
320 * same predecessor nodes (not values). By creating a node from the predecessor
321 * values/leaders, a true valuetree is built. Phis kill their predecessor value,
322 * so no circular dependencies need to be resolved.
325 * Maybe this could be implemented with a custom node hash that takes
326 * phi nodes and true values (instead of predecessors) into account,
327 * resulting in value numbers.
328 * TODO This unnecessarily also handles nodes like calls, which are never equal.
330 * @param irn a node representing an expression
331 * @return the value of the expression
333 static ir_node *remember(ir_node *irn)
335 int arity = get_irn_arity(irn);
338 ir_node **in = XMALLOCN(ir_node *, arity);
341 for (i = 0; i < arity; ++i) {
342 ir_node *pred = get_irn_n(irn, i);
343 /* value and leader at the same time */
344 ir_node *pred_value = identify(pred);
346 /* phi will be translated anyway, so kill the predecessor values
347 this also prevents circular dependencies */
349 /* every phi represents its own value */
354 /* predecessor is not its value representation/the leader */
355 if (pred != pred_value)
361 /* create representative for */
362 ir_node *nn = new_ir_node(
363 get_irn_dbg_info(irn),
365 get_nodes_block(irn),
370 copy_node_attr(environ->graph, irn, nn);
372 /* now the value can be determined because the
373 predecessors are the leaders */
374 value = identify_remember(nn);
376 value = identify_remember(irn);
380 DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
381 ir_nodehashmap_insert(&value_map, irn, value);
387 * When the value map has been built we may lookup expressions
388 * and remember them if new.
390 static ir_node *identify_or_remember(ir_node *irn)
392 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
396 return remember(irn);
399 /* --------------------------------------------------------
401 * --------------------------------------------------------
405 * Allocate block info for block block.
407 * @param block the block
408 * @param env the environment
410 static void alloc_block_info(ir_node *block, pre_env *env)
412 block_info *info = OALLOC(env->obst, block_info);
414 set_irn_link(block, info);
415 info->exp_gen = ir_valueset_new(16);
416 info->avail_out = ir_valueset_new(16);
417 info->antic_in = ir_valueset_new(16);
418 info->antic_done = ir_valueset_new(16);
419 info->trans = XMALLOC(ir_nodehashmap_t);
420 ir_nodehashmap_init(info->trans);
422 info->new_set = NULL;
427 info->next = env->list;
429 } /* alloc_block_info */
431 static void free_block_info(block_info *block_info)
433 ir_valueset_del(block_info->exp_gen);
434 ir_valueset_del(block_info->avail_out);
435 ir_valueset_del(block_info->antic_in);
436 if (block_info->trans) {
437 ir_nodehashmap_destroy(block_info->trans);
438 free(block_info->trans);
440 if (block_info->new_set)
441 ir_valueset_del(block_info->new_set);
445 * Bottom up walker that ensures that every block gets a block info.
447 * @param irn the node
448 * @param ctx the environment
450 static void block_info_walker(ir_node *irn, void *ctx)
453 pre_env *env = (pre_env*)ctx;
454 alloc_block_info(irn, env);
459 * Returns the block info of a block.
461 static block_info *get_block_info(ir_node *block)
463 return (block_info*)get_irn_link(block);
466 /* --------------------------------------------------------
467 * Infinite loop analysis
468 * --------------------------------------------------------
472 * Walker to set block marks and loop links to 0.
474 static void clear_block_mark_loop_link(ir_node *block, void *env)
478 if (is_Block(block)) {
479 set_Block_mark(block, 0);
480 set_loop_link(get_irn_loop(block), NULL);
485 * Returns non-zero if block is part of real loop loop.
488 static unsigned in_loop(ir_node *block, ir_loop *loop)
490 ir_loop *l = get_irn_loop(block);
491 ir_loop *outer = get_irg_loop(environ->graph);
494 /* loop tree root is not a loop */
495 if (l == NULL || l == outer)
497 l = get_loop_outer_loop(l);
503 * Returns the outermost real loop of loop.
505 static ir_loop *get_loop_outermost(ir_loop *loop)
507 ir_loop *outer = get_irg_loop(environ->graph);
509 ir_loop *last = NULL;
513 l = get_loop_outer_loop(l);
519 * Topologic bottom-up walker sets links of infinite loops to non-zero.
520 * Block marks are used to flag blocks reachable (from end) on the one hand,
521 * on the other hand they are set if the block is not part of an infinite loop.
523 static void infinite_loop_walker(ir_node *block, void *env)
529 if (! is_Block(block))
532 /* start block has no predecessors */
533 if (block == environ->start_block)
536 arity = get_irn_arity(block);
538 /* block not part of a real loop: no infinite loop */
539 if (get_irn_loop(block) == get_irg_loop(environ->graph))
540 set_Block_mark(block, 1);
542 if (get_Block_mark(block)) {
543 /* reachable block: mark all cf predecessors */
544 for (i = 0; i < arity; ++i) {
545 ir_node *pred = get_Block_cfgpred_block(block, i);
548 set_Block_mark(pred, 1);
551 /* We are in a real loop and see an unreachable block. */
552 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
554 /* flag loop as infinite */
555 set_loop_link(outermost_loop, outermost_loop);
556 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
558 /* The cf predecessors are unreachable, but can never be part of
559 an infinite loop, because we just reached them. So we set the
560 blockmark to prevent triggering the infinite loop detection. */
562 /* passing information to the cf predecessors */
563 for (i = 0; i < arity; ++i) {
564 ir_node *pred = get_Block_cfgpred_block(block, i);
569 /* If our cf predecessor is in the same endless loop,
570 it is also unreachable. */
571 if (in_loop(pred, outermost_loop)) {
572 set_Block_mark(pred, 0);
574 /* When we leave the unreachable loop, we artificially
575 declare the cf predecessor reachable. */
576 set_Block_mark(pred, 1);
583 * Sets loop links of outermost infinite loops to non-zero.
585 static void analyse_loops(ir_graph *irg)
587 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
589 /* reset block mark and loop links */
590 irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
592 /* mark end block reachable */
593 set_Block_mark(get_irg_end_block(irg), 1);
594 irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
596 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
599 #if NO_INF_LOOPS || NO_INF_LOOPS2
601 * Returns non-zero if block is part of an infinite loop.
603 static unsigned is_in_infinite_loop(ir_node *block)
607 assert(is_Block(block));
608 loop = get_irn_loop(block);
611 loop = get_loop_outermost(loop);
613 return (get_loop_link(loop) != NULL);
619 /* --------------------------------------------------------
621 * --------------------------------------------------------
625 * Returns non-zero if a node is movable and a possible candidate for PRE.
627 static unsigned is_nice_value(ir_node *n)
629 ir_mode *mode = get_irn_mode(n);
634 #if LOADS || OLD_DIVMODS || DIVMODS
635 if (is_Proj(n) && mode != mode_X && mode != mode_T)
644 return get_Load_volatility(n) == volatility_non_volatile;
647 if (get_irn_pinned(n) == op_pin_state_pinned)
650 if (! mode_is_data(mode)) {
651 if (! is_Div(n) && ! is_Mod(n))
658 * Checks if a node n is clean in block block for exp_gen.
661 * @param block the block
662 * @return non-zero value for clean node
664 static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
671 if (! is_nice_value(n))
675 /* filter loads with no phi predecessor from antic_in */
676 if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
682 ir_node *mem = get_Div_mem(n);
686 if (! is_Phi(mem) && ! is_NoMem(mem))
690 if (is_Mod(n) && ! is_Phi(get_Mod_mem(n)))
694 arity = get_irn_arity(n);
695 for (i = 0; i < arity; ++i) {
696 ir_node *pred = get_irn_n(n, i);
702 /* we only handle current block */
703 if (get_nodes_block(pred) != block)
706 if (! is_nice_value(pred))
709 value = identify(pred);
710 if (! ir_valueset_lookup(valueset, value))
718 * Topological walker puts nodes in top-down topological order into exp_gen set.
719 * Assumed to walk blockwise and nodewise topologically top-down.
721 * @param irn the node
722 * @param ctx the environment
724 static void topo_walker(ir_node *irn, void *ctx)
734 /* GVN step: remember the value. */
735 value = remember(irn);
737 /* values that are not in antic_in also dont't need to be in any other set */
738 if (! is_nice_value(irn))
741 if (is_irn_constlike(irn))
744 block = get_nodes_block(irn);
745 info = get_block_info(block);
747 ir_valueset_insert(info->avail_out, value, irn);
749 if (is_clean_in_block(irn, block, info->exp_gen)) {
750 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
752 ir_valueset_insert(info->exp_gen, value, irn);
756 /* --------------------------------------------------------
758 * --------------------------------------------------------
762 * Gets result of nodes phi translation into block.
764 * @param node the node
765 * @param block the target block
767 * @return a phi translation of node node into block block or NULL
769 static ir_node *get_translated(ir_node *block, ir_node *node)
771 if (is_irn_constlike(node))
774 return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
778 * Saves result of phi translation of node into predecessor
779 * at pos of block succ.
781 * @param node the node
782 * @param succ the successor of the translation target block
783 * @param pos the position of the predecessor block
784 * @param trans the translation result
787 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
789 if (is_irn_constlike(node))
791 /* insert or replace */
792 ir_nodehashmap_insert(map, node, trans);
796 /* Helper function to compare the values of pred and avail_pred. */
797 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
799 ir_node *avail_value = identify(avail_pred);
800 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
801 ir_node *trans_pred = get_translated(pred_block, pred);
804 if (trans_pred == NULL)
806 value = identify(trans_pred);
808 DB((dbg, LEVEL_3, "manual compare %+F %+F\n", pred, avail_pred));
810 return (value == avail_value);
814 * Does phi translation for redundant Div/Mod nodes only.
815 * Returns NULL for non-redundant node, which needs to be phi translated.
817 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
819 ir_node *mem = get_memop_mem(divmod);
820 ir_node *trans = get_translated_pred(block, pos, mem);
825 /* no partial redundancy if this is a mode_M phi */
826 if (is_Proj(trans)) {
827 /* The last memory operation in predecessor block */
828 ir_node *avail_op = get_Proj_pred(trans);
830 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
831 unsigned left, right;
833 if (is_Div(avail_op)) {
834 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
835 get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
837 left = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
838 right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
843 } else if (is_Mod(avail_op)) {
844 if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
846 left = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
847 right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
860 * Translates an expression above a Phi.
862 * @param node the node
863 * @param block the block the node is translated into
864 * @param pos the input number of the destination block
866 * @return a node representing the translated value
868 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *leaderset)
873 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
878 if (get_nodes_block(node) == block)
879 return get_Phi_pred(node, pos);
880 /* this phi does not need translation */
883 arity = get_irn_arity(node);
886 if (is_Div(node) || is_Mod(node)) {
887 ir_node *avail_op = phi_translate_divmod(node, block, pos);
894 in = ALLOCANZ(ir_node *, arity);
896 /* A value has several representatives. The anti leader is chosen to be
897 the main representative. If we access a node as representative of a
898 value we always use the anti leader. The anti leader can be found by
899 antic_in(identify(node)). */
900 for (i = 0; i < arity; ++i) {
901 ir_node *pred = get_irn_n(node, i);
902 ir_node *value = identify(pred);
903 /* get leader for pred to lookup its translated value */
904 ir_node *leader = ir_valueset_lookup(leaderset, value);
911 /* we cannot find this value in antic_in, because the value
912 has (possibly) changed! */
913 pred_trans = get_translated(pred_block, leader);
918 ir_node *mem = get_Div_mem(node);
923 pred_trans = get_Div_mem(node);
927 DB((dbg, LEVEL_3, "trans %+F of %+F is %+F\n", leader, pred_block, pred_trans));
928 if (pred_trans == NULL) {
931 new_pred = pred_trans;
933 /* loads: Predecessor is a memory phi, which translated yields a proj or
934 another phi. In case of projection and a load predecessor,
935 skip them and use the loads memory. */
936 if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
938 ir_node *load = get_Proj_pred(pred_trans);
939 /* If we do not translate this node, we will get its value wrong. */
943 /* Put new load under the adjacent loads memory edge
944 such that GVN may compare them. */
945 new_pred = get_Load_mem(load);
949 /* predecessor value changed, so translation is needed */
950 if (identify(new_pred) != identify(pred))
955 DB((dbg, LEVEL_4, "in %+F\n", new_pred));
962 DB((dbg, LEVEL_3, "Translate\n"));
965 pred_block = get_nodes_block(in[0]);
967 /* copy node to represent the new value.
968 We do not translate nodes that do not need translation,
969 so we use the newly created nodes as value representatives only.
970 Their block is not important, because we create new ones during
971 the insert node phase. */
973 get_irn_dbg_info(node),
980 /* We need the attribute copy here, because the Hash value of a
981 node might depend on it. */
982 copy_node_attr(environ->graph, node, nn);
983 /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
984 because it already uses availability. */
986 DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
991 * Block-walker, computes Antic_in(block).
992 * Builds a value tree out of the graph by translating values
995 * @param block the block
996 * @param ctx the walker environment
998 static void compute_antic(ir_node *block, void *ctx)
1000 pre_env *env = (pre_env*)ctx;
1001 block_info *succ_info;
1007 ir_valueset_iterator_t iter;
1010 /* filter blocks from topological walker */
1011 if (! is_Block(block))
1014 /* the end block has no successor */
1015 if (block == env->end_block)
1018 info = get_block_info(block);
1020 size = ir_valueset_size(info->antic_in);
1021 n_succ = get_Block_n_cfg_outs(block);
1024 if (env->first_iter) {
1026 /* keep antic_in of infinite loops empty */
1027 if (! is_in_infinite_loop(block)) {
1028 foreach_valueset(info->exp_gen, value, expr, iter) {
1029 ir_valueset_insert(info->antic_in, value, expr);
1033 foreach_valueset(info->exp_gen, value, expr, iter) {
1034 ir_valueset_insert(info->antic_in, value, expr);
1039 /* successor might have phi nodes */
1040 if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
1041 succ = get_Block_cfg_out(block, 0);
1042 int pos = get_Block_cfgpred_pos(succ, block);
1043 succ_info = get_block_info(succ);
1045 /* initialize translated set */
1046 if (env->first_iter) {
1047 info->trans = XMALLOC(ir_nodehashmap_t);
1048 ir_nodehashmap_init(info->trans);
1051 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1052 ir_node *trans = get_translated(block, expr);
1053 ir_node *trans_value;
1057 trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
1058 /* create new value if necessary */
1059 trans_value = identify_or_remember(trans);
1061 DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1063 /* On value change (phi present) we need the translated node
1064 to represent the new value for possible further translation. */
1065 if (value != trans_value)
1070 if (is_clean_in_block(expr, block, info->antic_in)) {
1072 /* no flow over the backedge of endless loops */
1073 if (env->iteration <= 2 || ! (! is_in_infinite_loop(succ) || ! is_backedge(succ, pos))) {
1074 ir_valueset_replace(info->antic_in, trans_value, represent);
1077 ir_valueset_replace(info->antic_in, trans_value, represent);
1080 set_translated(info->trans, expr, represent);
1083 } else if (n_succ > 1) {
1085 ir_node *common = NULL;
1086 ir_node *succ0 = get_Block_cfg_out(block, 0);
1087 block_info *succ0_info = get_block_info(succ0);
1089 /* disjoint of antic_ins */
1090 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1091 /* iterate over remaining successors */
1092 for (i = 1; i < n_succ; ++i) {
1093 ir_node *succ = get_Block_cfg_out(block, i);
1094 block_info *succ_info = get_block_info(succ);
1096 /* value in antic_in? */
1097 common = ir_valueset_lookup(succ_info->antic_in, value);
1102 if (common && is_clean_in_block(expr, block, info->antic_in))
1103 ir_valueset_replace(info->antic_in, value, expr);
1108 DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1110 if (size != ir_valueset_size(info->antic_in))
1114 /* --------------------------------------------------------
1115 * Main algorithm Avail_out
1116 * --------------------------------------------------------
1120 * Computes Avail_out(block):
1122 * Avail_in(block) = Avail_out(dom(block))
1123 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1126 * This function must be called in the top-down topological order:
1127 * Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1129 * @param block the block
1130 * @param ctx walker context
1132 static void compute_avail_top_down(ir_node *block, void *ctx)
1134 pre_env *env = (pre_env*)ctx;
1137 if (block == env->end_block)
1140 info = get_block_info(block);
1142 /* Add all nodes from the immediate dominator.
1143 This ensures that avail_out contains the leader. */
1144 if (block != env->start_block) {
1145 ir_node *dom_block = get_Block_idom(block);
1146 block_info *dom_info = get_block_info(dom_block);
1149 ir_valueset_iterator_t iter;
1151 foreach_valueset(dom_info->avail_out, value, expr, iter)
1152 /* replace: use the leader from dominator, not local exp_gen */
1153 ir_valueset_replace(info->avail_out, value, expr);
1156 DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1159 /* --------------------------------------------------------
1160 * Main algorithm redundancy detection
1161 * --------------------------------------------------------
1165 * Returns a valid mode if the value of expr is a partially redundant value.
1167 * @param block the block
1168 * @param expr the expression
1170 * @return mode of the expression if it is partially redundant else NULL
1172 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1174 ir_node *first_avail = NULL;
1176 int arity = get_irn_arity(block);
1177 int fully_redundant = 1;
1178 int partially_redundant = 0;
1179 ir_mode *mode = NULL;
1181 DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1183 /* for each predecessor blocks */
1184 for (pos = 0; pos < arity; ++pos) {
1185 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1186 block_info *pred_info;
1187 ir_node *trans_expr;
1188 ir_node *trans_value;
1189 ir_node *avail_expr;
1191 pred_info = get_block_info(pred_block);
1192 trans_expr = get_translated(pred_block, expr);
1193 trans_value = identify(trans_expr);
1195 if (is_Const(trans_expr))
1196 avail_expr = trans_expr;
1198 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1200 /* value might be available through a not yet existing constant */
1201 if (avail_expr == NULL && is_Const(trans_expr)) {
1202 /* limit range of new constants */
1203 ir_mode *cmode = get_irn_mode(trans_expr);
1204 ir_tarval *upper = new_tarval_from_long(127, cmode);
1205 ir_tarval *lower = new_tarval_from_long(-127, cmode);
1206 ir_tarval *c = get_Const_tarval(trans_expr);
1208 /* tarval within range? */
1209 if (tarval_cmp(lower, c) == ir_relation_less_equal &&
1210 tarval_cmp(c, upper) == ir_relation_less_equal) {
1211 avail_expr = trans_expr;
1217 DB((dbg, LEVEL_3, "avail_expr %+F trans_expr %+F\n", avail_expr, trans_expr));
1219 if (avail_expr == NULL) {
1220 pred_info->avail = trans_expr;
1221 pred_info->found = 0;
1222 fully_redundant = 0;
1224 /* expr is available, use the leader */
1225 pred_info->avail = avail_expr;
1226 pred_info->found = 1;
1227 mode = get_irn_mode(avail_expr);
1228 partially_redundant = 1;
1230 if (first_avail == NULL)
1231 first_avail = avail_expr;
1232 else if (first_avail != avail_expr)
1233 /* Multiple different expressions are available,
1234 This is why we need no cut over avail_out sets. */
1235 fully_redundant = 0;
1237 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1242 /* value is redundant from last iteration,
1243 but has not been removed from antic_in (is not optimized) */
1244 if (! environ->first_iter && is_redundant(block, expr))
1248 /* If it is not the same value already existing along every predecessor
1249 and it is defined by some predecessor then it is partially redundant. */
1250 if (! partially_redundant || fully_redundant)
1256 * Updates the new_set of a block by adding the new_set of
1257 * the immediate dominating block.
1261 static void update_new_set(ir_node *block, ir_node *idom)
1265 ir_valueset_iterator_t iter;
1266 block_info *curr_info = get_block_info(block);
1267 block_info *idom_info = get_block_info(idom);
1270 DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1271 foreach_valueset(idom_info->new_set, value, expr, iter) {
1272 /* inherit new_set from immediate dominator */
1273 ir_valueset_insert(curr_info->new_set, value, expr);
1274 /* replace in avail_out */
1275 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1277 #ifdef DEBUG_libfirm
1279 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1281 } /* update_new_set */
1285 * Returns redundant flag of node irn in block block.
1287 static unsigned is_redundant(ir_node *block, ir_node *irn)
1292 /* TODO Needs to use a flag, because antic_done should only be used
1293 if node is finally processed by insert_nodes. */
1299 * Checks if hoisting irn is greedy.
1300 * Greedy hoisting means that there are non partially redundant nodes
1301 * hoisted. This happens if a partially redundant node has
1302 * non redundant predecessors.
1304 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1306 int block_arity = get_irn_arity(block);
1307 int arity = get_irn_arity(irn);
1309 block_info *info = get_block_info(block);
1311 /* As long as the predecessor values are available in all predecessor blocks,
1312 we can hoist this value. */
1313 for (pos = 0; pos < block_arity; ++pos) {
1314 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1315 block_info *pred_info = get_block_info(pred_block);
1317 for (i = 0; i < arity; ++i) {
1318 ir_node *pred = get_irn_n(irn, i);
1326 /* Very conservative min cut. Phi might only have 1 user. */
1327 if (is_Phi(pred) && get_irn_n_edges(pred) != 1)
1331 if (is_Phi(pred) && get_nodes_block(pred) == block)
1334 DB((dbg, LEVEL_3, "pred %+F\n", pred));
1335 value = identify(pred);
1336 leader = ir_valueset_lookup(info->antic_in, value);
1339 DB((dbg, LEVEL_3, "lead %+F\n", leader));
1340 trans = get_translated(pred_block, leader);
1343 DB((dbg, LEVEL_3, "trans %+F\n", trans));
1345 trans_val = identify(trans);
1346 DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1348 if (is_Const(trans_val) || is_SymConst(trans_val)) {
1349 /* existing constant */
1350 if (get_irn_idx(trans_val) < environ->last_idx) {
1353 /* limit range of new constants */
1354 ir_mode *cmode = get_irn_mode(trans);
1355 ir_tarval *upper = new_tarval_from_long(128, cmode);
1356 ir_tarval *lower = new_tarval_from_long(-128, cmode);
1357 ir_tarval *c = get_Const_tarval(trans);
1359 /* tarval within range? */
1360 if (tarval_cmp(lower, c) == ir_relation_less &&
1361 tarval_cmp(c, upper) == ir_relation_less) {
1370 if (is_irn_constlike(trans_val))
1373 avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1378 /* only optimize if predecessors have been optimized */
1379 if (ir_valueset_lookup(info->antic_done, value) == NULL)
1388 * Perform insertion of partially redundant values.
1389 * For every block node, do the following:
1390 * 1. Propagate the NEW_SETS of the dominator into the current block.
1391 * If the block has multiple predecessors,
1392 * 2a. Iterate over the ANTIC expressions for the block to see if
1393 * any of them are partially redundant.
1394 * 2b. If so, insert them into the necessary predecessors to make
1395 * the expression fully redundant.
1396 * 2c. Insert a new Phi merging the values of the predecessors.
1397 * 2d. Insert the new Phi, and the new expressions, into the
1400 * @param block the block
1401 * @param ctx the walker environment
1403 static void insert_nodes_walker(ir_node *block, void *ctx)
1405 pre_env *env = (pre_env*)ctx;
1406 int arity = get_irn_arity(block);
1412 ir_valueset_iterator_t iter;
1418 if (! is_Block(block))
1421 /* ensure that even the start block has a new_set */
1422 info = get_block_info(block);
1424 ir_valueset_del(info->new_set);
1425 info->new_set = ir_valueset_new(16);
1427 if (block == env->start_block)
1430 DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1432 idom = get_Block_idom(block);
1433 update_new_set(block, idom);
1435 /* process only path joining blocks */
1441 stack = plist_new();
1442 foreach_valueset(info->antic_in, value, expr, iter) {
1443 /* inverse topologic */
1444 plist_insert_front(stack, expr);
1448 /* This is the main reason antic_in is preverred over antic_out;
1449 we may iterate over every anticipated value first and not
1450 over the predecessor blocks. */
1451 foreach_valueset(info->antic_in, value, expr, iter) {
1457 if (ir_valueset_lookup(info->antic_done, value))
1460 /* filter phi nodes from antic_in */
1464 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1466 /* A value computed in the dominator is totally redundant.
1467 Hence we have nothing to insert. */
1468 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1469 DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1470 DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1472 ir_valueset_insert(info->antic_done, value, expr);
1477 if (is_hoisting_greedy(expr, block)) {
1478 DB((dbg, LEVEL_2, "greedy\n"));
1483 mode = is_partially_redundant(block, expr, value);
1488 if (is_hoisting_greedy(expr, block)) {
1489 DB((dbg, LEVEL_2, "Better greed: greedy\n"));
1494 #if LOADS || OLD_DIVMODS || DIVMODS
1495 /* save old mode_M phis to remove keepalive edges later */
1496 if (is_memop(expr)) {
1497 ir_node *mem = get_memop_mem(expr);
1498 if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1499 ir_nodeset_insert(env->keeps, mem);
1504 #ifdef DEBUG_libfirm
1505 if (! is_Proj(expr)) {
1506 if (env->first_iter)
1507 inc_stats(gvnpre_stats->first_iter_found);
1508 inc_stats(gvnpre_stats->partially);
1511 inc_stats(gvnpre_stats->loads);
1512 else if (is_Div(expr) || is_Mod(expr))
1513 inc_stats(gvnpre_stats->divmods);
1516 phi_in = XMALLOCN(ir_node *, arity);
1518 /* for each predecessor block */
1519 for (pos = 0; pos < arity; ++pos) {
1520 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1521 block_info *pred_info;
1523 pred_info = get_block_info(pred_block);
1525 if (! pred_info->found) {
1527 int node_arity = get_irn_arity(expr);
1528 ir_node **in = XMALLOCNZ(ir_node *, node_arity);
1530 ir_node *new_value, *new_value2;
1531 ir_node *target_block = pred_block;
1533 for (i = 0; i < node_arity; ++i) {
1534 ir_node *pred = get_irn_n(expr, i);
1535 ir_node *value = identify(pred);
1541 /* transform knowledge over the predecessor from
1542 anti-leader world into leader world. */
1544 DB((dbg, LEVEL_3, "pred %+F\n", pred));
1545 value = identify(pred);
1547 /* get leader for pred to lookup its translated value */
1548 leader = ir_valueset_lookup(info->antic_in, value);
1551 DB((dbg, LEVEL_3, "lead %+F\n", leader));
1553 trans = get_translated(pred_block, leader);
1556 DB((dbg, LEVEL_3, "trans %+F\n", trans));
1558 /* in case of phi, we are done */
1559 if (is_Phi(pred) && get_nodes_block(pred) == block) {
1564 trans_val = identify(trans);
1565 DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1567 /* constants are always available but not in avail set */
1568 if (is_irn_constlike(trans_val)) {
1574 In case of loads we need to make sure the hoisted
1575 loads are found despite their unique value. */
1576 avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1577 DB((dbg, LEVEL_3, "avail %+F\n", avail));
1579 assert(avail && "predecessor has to be available");
1584 target_block = get_nodes_block(in[0]);
1586 /* Copy node to represent the new value.
1587 We use translated nodes as value representatives only.
1588 They have anti leaders as predecessors, not leaders!
1589 So we have to create a new node using leaders. */
1590 trans = new_ir_node(
1591 get_irn_dbg_info(expr),
1596 get_irn_arity(expr),
1599 /* We need the attribute copy here, because the Hash value of a
1600 node might depend on it. */
1601 copy_node_attr(environ->graph, expr, trans);
1603 /* value is now available in target block through trans
1604 insert (not replace) because it has not been available */
1605 new_value = identify_or_remember(trans);
1606 ir_valueset_insert(pred_info->avail_out, new_value, trans);
1607 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value));
1609 new_value2 = identify(get_translated(pred_block, expr));
1610 ir_valueset_insert(pred_info->avail_out, new_value2, trans);
1611 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value2));
1613 DB((dbg, LEVEL_3, "Use new %+F in %+F because %+F(%+F) not available\n", trans, pred_block, expr, value));
1615 phi_in[pos] = trans;
1617 /* value available */
1618 phi_in[pos] = pred_info->avail;
1620 DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1623 /* We do not connect tuples as they will be connected automatically
1624 by the corresponding projections. */
1625 if (get_irn_mode(expr) != mode_T) {
1627 phi = new_r_Phi(block, arity, phi_in, mode);
1628 DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1630 /* This value is now available through the new phi.
1631 insert || replace in avail_out */
1632 ir_valueset_replace(info->avail_out, value, phi);
1633 ir_valueset_insert(info->new_set, value, phi);
1637 /* already optimized this value in this block */
1638 ir_valueset_insert(info->antic_done, value, expr);
1644 Better greed first determines which values are redundant
1645 and decides then which to take.
1646 insert_nodes needs to be split up for that. The cycle could be
1647 for each block: flag redundant nodes,
1648 use heuristic to adjust these flags (also consider antic_done),
1650 This way we could decide if we should hoist a non redundant node,
1651 if all its successors are redundant.
1652 Or we might try to minimize the cut along hoisted nodes and their
1653 non redundant successors.
1656 plist_element_t *it;
1657 /* iterate in inverse topological order */
1658 foreach_plist(stack, it) {
1659 ir_node *irn = (ir_node *)plist_element_get_value(it);
1660 ir_node *block = get_nodes_block(irn);
1664 /* does irn only have redundant successors? */
1666 foreach_out_edge(irn, edge) {
1667 ir_node *succ = get_edge_src_irn(edge);
1669 /* if succ and irn are in the same block */
1670 if (get_nodes_block(succ) == block && is_redundant(block, succ)) {
1679 flag_redundant(irn, 1);
1688 static void update_new_set_walker(ir_node *block, void *ctx)
1690 pre_env *env = (pre_env*)ctx;
1692 if (! is_Block(block))
1694 if (block == env->start_block)
1697 update_new_set(block, get_Block_idom(block));
1701 * Domtree block walker to insert nodes with dying operands
1702 * into the highest possible block whilst still being anticipated.
1704 static void hoist_high(ir_node *block, void *ctx)
1706 pre_env *env = (pre_env*)ctx;
1707 block_info *curr_info;
1708 ir_valueset_iterator_t iter;
1711 int arity = get_irn_arity(block);
1713 if (! is_Block(block))
1716 curr_info = get_block_info(block);
1718 if (curr_info->new_set)
1719 ir_valueset_del(curr_info->new_set);
1720 curr_info->new_set = ir_valueset_new(16);
1722 if (block == env->start_block)
1728 DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1730 /* foreach entry optimized by insert node phase */
1731 foreach_valueset(curr_info->antic_done, value, expr, iter) {
1734 /* TODO currently we cannot handle load and their projections */
1735 if (is_memop(expr) || is_Proj(expr))
1738 DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
1740 /* visit hoisted expressions */
1741 for (pos = 0; pos < arity; ++pos) {
1742 /* standard target is predecessor block */
1743 ir_node *target = get_Block_cfgpred_block(block, pos);
1744 block_info *pred_info = get_block_info(target);
1746 ir_node *new_target;
1747 ir_node *trans_expr;
1748 ir_node *trans_value;
1752 unsigned nest_depth;
1753 block_info *dom_info;
1755 /* get phi translated value */
1756 trans_expr = get_translated(target, expr);
1757 trans_value = identify(trans_expr);
1758 avail = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1760 /* get the used expr on this path */
1762 /* TODO when does this happen? */
1766 avail_arity = get_irn_arity(avail);
1767 value = identify(avail);
1769 /* anticipation border */
1771 nest_depth = get_loop_depth(get_irn_loop(target));
1773 /* Either push the hoisted nodes up their path,
1774 or try to put them directly into their common dominator. */
1776 /* By using block (instead of target) as initial block,
1777 we only allow hoisting into a common block of
1778 both predecessor blocks. */
1784 while (dom && dom != get_Block_idom(block)) {
1786 dom = get_Block_idom(dom);
1787 dom_info = get_block_info(dom);
1788 DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
1790 /* TODO Being in antic_in means hoistable above block,
1791 but we need 'hoistable into block'.
1792 This could be achieved by a flag for each valueset pair,
1793 being set during antic computation. */
1795 /* check if available node ist still anticipated and clean */
1796 if (! ir_valueset_lookup(dom_info->antic_in, value)) {
1797 DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
1801 nest_depth = get_loop_depth(get_irn_loop(dom));
1803 /* do not hoist into loops */
1804 if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
1805 DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
1806 /* not a suitable location */
1810 /* check if operands die */
1812 /* check for uses on current path */
1813 for (i = 0; i < avail_arity; i++) {
1814 ir_node *pred = get_irn_n(avail, i);
1815 ir_node *pred_value = identify(pred);
1820 DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
1822 if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
1823 DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
1828 /* check every successor */
1829 foreach_out_edge(pred, edge) {
1830 ir_node *succ = get_edge_src_irn(edge);
1831 DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
1833 /* check only successors on current path to end */
1834 if (block_dominates(dom, get_nodes_block(succ))) {
1835 ir_node *succ_value = identify(succ);
1837 /* Do we have another user than avail?
1838 Then predecessor is not dead after removal of avail. */
1839 if (succ_value != value) {
1840 DB((dbg, LEVEL_4, "still used in %+F\n", succ));
1851 /* only try common dominator */
1856 /* put node into new target block */
1858 block_info *target_info = get_block_info(new_target);
1859 int nn_arity = get_irn_arity(avail);
1860 ir_node **in = XMALLOCN(ir_node *, nn_arity);
1864 DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
1865 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1867 for (i = 0; i < nn_arity; ++i) {
1868 ir_node *pred = get_irn_n(avail, i);
1869 ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
1874 get_irn_dbg_info(avail),
1878 get_irn_mode(avail),
1883 identify_or_remember(nn);
1884 /* TODO Nodes are inserted into a dominating block and should
1885 be available from this point on. Currently we do not push
1886 the availability information through during the walk. */
1887 ir_valueset_insert(target_info->new_set, value, nn);
1894 /* --------------------------------------------------------
1895 * Elimination of fully redundant nodes
1896 * --------------------------------------------------------
1900 * Walker which finds redundant nodes using avail_out sets
1901 * and exchanges them for existing ones.
1902 * We cannot change the graph here as this would affect
1903 * the hash values of the nodes.
1905 * @param irn the node
1906 * @param ctx the walker environment
1908 static void eliminate(ir_node *irn, void *ctx)
1910 pre_env *env = (pre_env*)ctx;
1912 if (! is_Block(irn)) {
1913 ir_node *block = get_nodes_block(irn);
1914 block_info *info = get_block_info(block);
1915 ir_node *value = identify(irn);
1917 if (value != NULL) {
1918 ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1919 DB((dbg, LEVEL_3, "Elim %+F(%+F) avail %+F\n", irn, value, expr));
1921 if (expr != NULL && expr != irn) {
1922 elim_pair *p = OALLOC(env->obst, elim_pair);
1926 p->next = env->pairs;
1927 if (get_irn_idx(expr) > env->last_idx)
1928 p->reason = FS_OPT_GVN_PARTLY;
1930 p->reason = FS_OPT_GVN_FULLY;
1932 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1939 * Do all the recorded changes and optimize
1940 * newly created Phi's.
1942 * @param pairs list of elimination pairs
1944 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1947 ir_node *end = environ->end_node;
1949 for (p = pairs; p != NULL; p = p->next) {
1950 /* might be already changed */
1951 p->new_node = skip_Id(p->new_node);
1953 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1955 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1956 * which we can optimize here */
1957 if (is_Phi(p->new_node)) {
1959 ir_node *res = NULL;
1961 for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1962 ir_node *pred = get_irn_n(p->new_node, i);
1964 if (pred != p->old_node) {
1973 exchange(p->new_node, res);
1977 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1979 exchange(p->old_node, p->new_node);
1982 /* remove keep alive edges of unused mode_M phis */
1983 foreach_ir_nodeset(keeps, m_phi, iter) {
1984 remove_End_keepalive(end, m_phi);
1986 } /* eliminate_nodes */
1989 /* --------------------------------------------------------
1991 * --------------------------------------------------------
1995 * Gvn_Pre algorithm.
1997 * @param irg the graph
1998 * @param env the environment
2000 static void gvn_pre(ir_graph *irg, pre_env *env)
2002 unsigned antic_iter;
2003 unsigned insert_iter;
2005 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
2007 /* allocate block info */
2008 irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
2010 ir_nodehashmap_init(&value_map);
2012 /* generate exp_gen */
2013 irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
2014 dump_all_expgen_sets(env->list);
2016 /* compute the avail_out sets for all blocks */
2017 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
2019 /* compute the anticipated value sets for all blocks */
2021 env->first_iter = 1;
2024 /* antic_in passes */
2027 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
2029 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
2030 env->first_iter = 0;
2031 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
2033 } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
2035 DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
2037 ir_nodeset_init(env->keeps);
2039 env->first_iter = 1;
2040 /* compute redundant expressions */
2043 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
2045 /* TODO topologically top down would be better; fewer iterations. */
2046 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
2047 env->first_iter = 0;
2048 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
2049 } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
2050 DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
2053 /* An attempt to reduce lifetimes by hoisting already hoisted values
2054 even higher if their operands die. */
2055 dom_tree_walk_irg(irg, hoist_high, NULL, env);
2056 /* update avail_out for elimination */
2057 dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
2060 /* Deactivate edges to prevent intelligent removal of nodes,
2061 or else we will get deleted nodes which we try to exchange. */
2062 edges_deactivate(environ->graph);
2064 /* eliminate nodes */
2065 irg_walk_graph(irg, NULL, eliminate, env);
2066 eliminate_nodes(env->pairs, env->keeps);
2068 ir_nodeset_destroy(env->keeps);
2072 * Gvn_Pre pass for graph irg.
2074 * @param irg the graph
2076 void do_gvn_pre(ir_graph *irg)
2078 struct obstack obst;
2081 optimization_state_t state;
2082 block_info *block_info;
2084 /* bads and unreachables cause too much trouble with dominance,
2085 loop info for endless loop detection,
2086 no critical edges is PRE precondition
2088 assure_irg_properties(irg,
2089 IR_GRAPH_PROPERTY_NO_BADS
2090 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
2091 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
2092 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
2093 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
2094 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
2096 /* register a debug mask */
2097 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
2099 save_optimization_state(&state);
2100 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2102 edges_activate(irg);
2105 DEBUG_ONLY(init_stats();)
2107 /* setup environment */
2108 obstack_init(&obst);
2112 env.start_block = get_irg_start_block(irg);
2113 env.end_block = get_irg_end_block(irg);
2114 env.end_node = get_irg_end(irg);
2117 env.last_idx = get_irg_last_idx(irg);
2119 /* Detect and set links of infinite loops to non-zero. */
2122 /* Switch on GCSE. We need it to correctly compute
2123 the value of a node, which is independent from
2125 set_opt_global_cse(1);
2126 /* new_identities */
2127 if (irg->value_table != NULL)
2128 del_pset(irg->value_table);
2129 /* initially assumed nodes in pset are 512 */
2130 irg->value_table = new_pset(compare_gvn_identities, 512);
2132 /* do GVN-PRE pass */
2134 DEBUG_ONLY(print_stats();)
2136 /* clean up: delete all sets */
2137 for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2138 free_block_info(block_info);
2141 DEBUG_ONLY(free_stats();)
2142 ir_nodehashmap_destroy(&value_map);
2143 obstack_free(&obst, NULL);
2144 ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2146 /* Pin the graph again.
2147 This is needed due to the use of set_opt_global_cse(1) */
2148 set_irg_pinned(irg, op_pin_state_pinned);
2149 restore_optimization_state(&state);
2150 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2152 /* TODO There seem to be optimizations that try to use the existing
2154 new_identities(irg);
2156 /* TODO assure nothing else breaks. */
2157 set_opt_global_cse(0);
2158 edges_activate(irg);
2161 /* Creates an ir_graph pass for do_gvn_pre. */
2162 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2164 return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);