2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
46 #include "irgraph_t.h"
51 /* suggested by GVN-PRE authors */
52 #define MAX_ANTIC_ITER 10
53 #define MAX_INSERT_ITER 3
55 /* Stops antic iteration from processing endless loops. */
56 #define IGNORE_INF_LOOPS 1
57 /* Stops antic information to flow over infinite loop backedge */
58 #define NO_INF_LOOPS 0
60 /* Attempt to reduce register pressure and reduce code size
65 /* Seamless implementation of handling loads and generally memory
66 dependent nodes with GVN-PRE. */
72 /* Only optimize node directly after phi node if node is only successor. */
75 /* GVN is very limited. This enables optimize_node during value recognition.
76 GVN ist still very limited, since a+1+b+1 doesn't equal a+b+2.
77 TODO Broken for yet unknown reasons. */
78 #define OPTIMIZE_NODES 0
83 /** Additional info we need for every block. */
84 typedef struct block_info {
85 ir_valueset_t *exp_gen; /* contains this blocks clean expressions */
86 ir_valueset_t *avail_out; /* available values at block end */
87 ir_valueset_t *antic_in; /* clean anticipated values at block entry */
88 ir_valueset_t *antic_done; /* keeps elements of antic_in after insert nodes phase */
89 ir_valueset_t *new_set; /* new by hoisting made available values */
90 ir_nodehashmap_t *trans; /* contains translated nodes translated into block */
91 ir_node *avail; /* saves available node for insert node phase */
92 int found; /* saves kind of availability for insert_node phase */
93 ir_node *block; /* block of the block_info */
94 struct block_info *next; /* links all instances for easy access */
98 * A pair of nodes to be exchanged.
99 * We have to defer the exchange because there are still needed references
102 typedef struct elim_pair {
103 ir_node *old_node; /* node that will be replaced */
104 ir_node *new_node; /* replacement for old_node */
105 struct elim_pair *next; /* links all instances for easy access */
106 int reason; /* reason for the replacement */
109 /** environment for the GVN-PRE algorithm */
110 typedef struct pre_env {
111 ir_graph *graph; /* current graph */
112 struct obstack *obst; /* obstack to allocate on */
113 ir_node *start_block; /* start block of the current graph */
114 ir_node *end_block; /* end block of the current graph */
115 ir_node *end_node; /* end node of the current graph */
116 block_info *list; /* block_info list head */
117 elim_pair *pairs; /* elim_pair list head */
118 ir_nodeset_t *keeps; /* a list of to be removed phis to kill their keep alive edges */
119 unsigned last_idx; /* last node index of input graph */
120 char changes; /* flag for fixed point iterations - non-zero if changes occurred */
121 char first_iter; /* non-zero for first fixed point iteration */
122 int iteration; /* iteration counter */
124 pset *value_table; /* standard value table*/
125 pset *gvnpre_values; /* gvnpre value table */
129 static pre_env *environ;
131 /* custom GVN value map */
132 static ir_nodehashmap_t value_map;
134 /* debug module handle */
135 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
139 /* --------------------------------------------------------
141 * --------------------------------------------------------
144 typedef struct gvnpre_statistics {
151 int first_iter_found;
152 int antic_iterations;
153 int insert_iterations;
157 gvnpre_statistics *gvnpre_stats = NULL;
159 static void init_stats()
161 gvnpre_stats = XMALLOCZ(gvnpre_statistics);
164 static void free_stats()
170 static void print_stats()
172 gvnpre_statistics *stats = gvnpre_stats;
173 DB((dbg, LEVEL_1, "replaced : %d\n", stats->replaced));
174 DB((dbg, LEVEL_1, "antic_in iterations : %d\n", stats->antic_iterations));
175 DB((dbg, LEVEL_1, "insert iterations : %d\n", stats->insert_iterations));
176 DB((dbg, LEVEL_1, "infinite loops : %d\n", stats->infinite_loops));
177 DB((dbg, LEVEL_1, "fully redundant : %d\n", stats->fully));
178 DB((dbg, LEVEL_1, "partially redundant : %d\n", stats->partially));
179 DB((dbg, LEVEL_1, " loads : %d\n", stats->loads));
180 DB((dbg, LEVEL_1, " Divs/Mods : %d\n", stats->divmods));
181 DB((dbg, LEVEL_1, " hoist high : %d\n", stats->hoist_high));
182 DB((dbg, LEVEL_1, " first iteration : %d\n", stats->first_iter_found));
185 #define set_stats(var, value) (var)=(value)
186 #define inc_stats(var) ((var)+=1)
188 /* --------------------------------------------------------
190 * --------------------------------------------------------
196 * @param set the set to dump
197 * @param txt a text to describe the set
198 * @param block the owner block of the set
200 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
202 ir_valueset_iterator_t iter;
203 ir_node *value, *expr;
206 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
208 foreach_valueset(set, value, expr, iter) {
210 DB((dbg, LEVEL_2, "\n"));
212 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
214 DB((dbg, LEVEL_2, " %+F,", expr));
217 DB((dbg, LEVEL_2, "\n}\n"));
218 } /* dump_value_set */
221 * Dump all exp_gen value sets.
223 * @param list the list of block infos to retrieve the sets from
225 static void dump_all_expgen_sets(block_info *list)
227 block_info *block_info;
229 for (block_info = list; block_info != NULL; block_info = block_info->next) {
230 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
236 #define dump_all_expgen_sets(list)
237 #define dump_value_set(set, txt, block)
239 #endif /* DEBUG_libfirm */
241 /* --------------------------------------------------------
243 * --------------------------------------------------------
247 * Compares node collisions in valuetable.
248 * Modified identities_cmp().
250 static int compare_gvn_identities(const void *elt, const void *key)
252 ir_node *a = (ir_node *)elt;
253 ir_node *b = (ir_node *)key;
256 if (a == b) return 0;
258 /* phi nodes kill predecessor values and are always different */
259 if (is_Phi(a) || is_Phi(b))
262 /* memops are not the same, even if we want to optimize them
263 we have to take the order in account */
264 if (is_memop(a) || is_memop(b) ||
265 get_irn_mode(a) == mode_T ||
266 get_irn_mode(b) == mode_T) {
267 /* Loads with the same predecessors are the same value;
268 this should only happen after phi translation. */
269 if ((! is_Load(a) || ! is_Load(b)) && (! is_Store(a) || ! is_Store(b)))
273 if ((get_irn_op(a) != get_irn_op(b)) ||
274 (get_irn_mode(a) != get_irn_mode(b))) return 1;
276 /* compare if a's in and b's in are of equal length */
277 irn_arity_a = get_irn_arity(a);
278 if (irn_arity_a != get_irn_arity(b))
281 /* blocks are never the same */
282 if (is_Block(a) || is_Block(b))
285 /* should only be used with gcse enabled */
286 assert(get_opt_global_cse());
288 /* compare a->in[0..ins] with b->in[0..ins] */
289 for (i = 0; i < irn_arity_a; ++i) {
290 ir_node *pred_a = get_irn_n(a, i);
291 ir_node *pred_b = get_irn_n(b, i);
292 if (pred_a != pred_b) {
293 if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
299 * here, we already now that the nodes are identical except their
302 if (a->op->ops.node_cmp_attr)
303 return a->op->ops.node_cmp_attr(a, b);
309 * Identify does a lookup in the GVN valuetable.
310 * To be used when no new GVN values are to be created.
312 * @param e a node representing an expression
313 * @return a node representing the value
315 static ir_node *identify(ir_node *irn)
317 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
320 /* irn represents a new value, so return the leader */
321 return identify_remember(irn);
325 * remember() adds node irn to the GVN valuetable.
326 * Identify_remember only identifies values of nodes with the
327 * same predecessor nodes (not values). By creating a node from the predecessor
328 * values/leaders, a true valuetree is built. Phis kill their predecessor value,
329 * so no circular dependencies need to be resolved.
332 * Maybe this could be implemented with a custom node hash that takes
333 * phi nodes and true values (instead of predecessors) into account,
334 * resulting in value numbers.
335 * TODO This unnecessarily also handles nodes like calls, which are never equal.
337 * @param irn a node representing an expression
338 * @return the value of the expression
340 static ir_node *remember(ir_node *irn)
342 int arity = get_irn_arity(irn);
345 ir_node **in = XMALLOCN(ir_node *, arity);
348 for (i = 0; i < arity; ++i) {
349 ir_node *pred = get_irn_n(irn, i);
350 /* value and leader at the same time */
351 ir_node *pred_value = identify(pred);
353 /* phi will be translated anyway, so kill the predecessor values
354 this also prevents circular dependencies */
356 /* every phi represents its own value */
361 /* predecessor is not its value representation/the leader */
362 if (pred != pred_value)
367 if (changed && !is_memop(irn) && get_irn_mode(irn) != mode_X) {
368 /* create representative for */
369 ir_node *nn = new_ir_node(
370 get_irn_dbg_info(irn),
372 get_nodes_block(irn),
377 copy_node_attr(environ->graph, irn, nn);
380 /* TODO optimize_node() uses the valuetable and thus the custom
381 identify_cmp() and will fail trying. */
382 environ->graph->value_table = environ->value_table;
383 nn = optimize_node(nn);
384 environ->graph->value_table = environ->gvnpre_values;
387 /* now the value can be determined because the
388 predecessors are the leaders */
389 value = identify_remember(nn);
391 value = identify_remember(irn);
395 DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
396 ir_nodehashmap_insert(&value_map, irn, value);
402 * When the value map has been built we may lookup expressions
403 * and remember them if new.
405 static ir_node *identify_or_remember(ir_node *irn)
407 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
411 return remember(irn);
414 /* --------------------------------------------------------
416 * --------------------------------------------------------
420 * Allocate block info for block block.
422 * @param block the block
423 * @param env the environment
425 static void alloc_block_info(ir_node *block, pre_env *env)
427 block_info *info = OALLOC(env->obst, block_info);
429 set_irn_link(block, info);
430 info->exp_gen = ir_valueset_new(16);
431 info->avail_out = ir_valueset_new(16);
432 info->antic_in = ir_valueset_new(16);
433 info->antic_done = ir_valueset_new(16);
434 info->trans = XMALLOC(ir_nodehashmap_t);
435 ir_nodehashmap_init(info->trans);
437 info->new_set = NULL;
442 info->next = env->list;
444 } /* alloc_block_info */
446 static void free_block_info(block_info *block_info)
448 ir_valueset_del(block_info->exp_gen);
449 ir_valueset_del(block_info->avail_out);
450 ir_valueset_del(block_info->antic_in);
451 if (block_info->trans) {
452 ir_nodehashmap_destroy(block_info->trans);
453 free(block_info->trans);
455 if (block_info->new_set)
456 ir_valueset_del(block_info->new_set);
460 * Bottom up walker that ensures that every block gets a block info.
462 * @param irn the node
463 * @param ctx the environment
465 static void block_info_walker(ir_node *irn, void *ctx)
468 pre_env *env = (pre_env*)ctx;
469 alloc_block_info(irn, env);
474 * Returns the block info of a block.
476 static block_info *get_block_info(ir_node *block)
478 return (block_info*)get_irn_link(block);
481 /* --------------------------------------------------------
482 * Infinite loop analysis
483 * --------------------------------------------------------
487 * Walker to set block marks and loop links to 0.
489 static void clear_block_mark_loop_link(ir_node *block, void *env)
493 if (is_Block(block)) {
494 set_Block_mark(block, 0);
495 set_loop_link(get_irn_loop(block), NULL);
500 * Returns non-zero if block is part of real loop loop.
503 static unsigned in_loop(ir_node *block, ir_loop *loop)
505 ir_loop *l = get_irn_loop(block);
506 ir_loop *outer = get_irg_loop(environ->graph);
509 /* loop tree root is not a loop */
510 if (l == NULL || l == outer)
512 l = get_loop_outer_loop(l);
518 * Returns the outermost real loop of loop.
520 static ir_loop *get_loop_outermost(ir_loop *loop)
522 ir_loop *outer = get_irg_loop(environ->graph);
524 ir_loop *last = NULL;
528 l = get_loop_outer_loop(l);
534 * Topologic bottom-up walker sets links of infinite loops to non-zero.
535 * Block marks are used to flag blocks reachable (from end) on the one hand,
536 * on the other hand they are set if the block is not part of an infinite loop.
538 static void infinite_loop_walker(ir_node *block, void *env)
544 if (! is_Block(block))
547 /* start block has no predecessors */
548 if (block == environ->start_block)
551 arity = get_irn_arity(block);
553 /* block not part of a real loop: no infinite loop */
554 if (get_irn_loop(block) == get_irg_loop(environ->graph))
555 set_Block_mark(block, 1);
557 if (get_Block_mark(block)) {
558 /* reachable block: mark all cf predecessors */
559 for (i = 0; i < arity; ++i) {
560 ir_node *pred = get_Block_cfgpred_block(block, i);
563 set_Block_mark(pred, 1);
566 /* We are in a real loop and see an unreachable block. */
567 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
569 /* flag loop as infinite */
570 set_loop_link(outermost_loop, outermost_loop);
571 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
573 /* The cf predecessors are unreachable, but can never be part of
574 an infinite loop, because we just reached them. So we set the
575 blockmark to prevent triggering the infinite loop detection. */
577 /* passing information to the cf predecessors */
578 for (i = 0; i < arity; ++i) {
579 ir_node *pred = get_Block_cfgpred_block(block, i);
584 /* If our cf predecessor is in the same endless loop,
585 it is also unreachable. */
586 if (in_loop(pred, outermost_loop)) {
587 set_Block_mark(pred, 0);
589 /* When we leave the unreachable loop, we artificially
590 declare the cf predecessor reachable. */
591 set_Block_mark(pred, 1);
598 * Sets loop links of outermost infinite loops to non-zero.
600 static void analyse_loops(ir_graph *irg)
602 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
604 /* reset block mark and loop links */
605 irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
607 /* mark end block reachable */
608 set_Block_mark(get_irg_end_block(irg), 1);
609 irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
611 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
614 #if IGNORE_INF_LOOPS || NO_INF_LOOPS
616 * Returns non-zero if block is part of an infinite loop.
618 static unsigned is_in_infinite_loop(ir_node *block)
622 assert(is_Block(block));
623 loop = get_irn_loop(block);
626 loop = get_loop_outermost(loop);
628 return (get_loop_link(loop) != NULL);
634 /* --------------------------------------------------------
636 * --------------------------------------------------------
640 * Returns non-zero if a node is movable and a possible candidate for PRE.
642 static unsigned is_nice_value(ir_node *n)
644 ir_mode *mode = get_irn_mode(n);
649 #if LOADS || OLD_DIVMODS || DIVMODS
650 if (is_Proj(n) && mode != mode_X && mode != mode_T)
659 return get_Load_volatility(n) == volatility_non_volatile;
661 return get_Store_volatility(n) == volatility_non_volatile;
664 if (get_irn_pinned(n) == op_pin_state_pinned)
667 if (! mode_is_data(mode)) {
668 if (! is_Div(n) && ! is_Mod(n))
675 * Checks if a node n is clean in block block for exp_gen.
678 * @param block the block
679 * @return non-zero value for clean node
681 static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
688 if (! is_nice_value(n))
692 /* filter loads with no phi predecessor from antic_in */
693 if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
695 if (is_Store(n) && ! is_Phi(get_Store_mem(n)))
701 ir_node *mem = get_Div_mem(n);
705 if (! is_Phi(mem) && ! is_NoMem(mem))
709 if (is_Mod(n) && ! is_Phi(get_Mod_mem(n)))
713 arity = get_irn_arity(n);
714 for (i = 0; i < arity; ++i) {
715 ir_node *pred = get_irn_n(n, i);
721 /* we only handle current block */
722 if (get_nodes_block(pred) != block)
725 if (! is_nice_value(pred))
728 value = identify(pred);
729 if (! ir_valueset_lookup(valueset, value))
737 * Topological walker puts nodes in top-down topological order into exp_gen set.
738 * Assumed to walk blockwise and nodewise topologically top-down.
740 * @param irn the node
741 * @param ctx the environment
743 static void topo_walker(ir_node *irn, void *ctx)
754 environ->graph->value_table = environ->value_table;
755 identify_remember(irn);
756 environ->graph->value_table = environ->gvnpre_values;
759 /* GVN step: remember the value. */
760 value = remember(irn);
762 if (is_irn_constlike(irn))
765 block = get_nodes_block(irn);
766 info = get_block_info(block);
768 if (get_irn_mode(irn) != mode_X)
769 ir_valueset_insert(info->avail_out, value, irn);
771 /* values that are not in antic_in also dont't need to be in any other set */
773 if (! is_nice_value(irn))
776 if (is_clean_in_block(irn, block, info->exp_gen)) {
777 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
779 ir_valueset_insert(info->exp_gen, value, irn);
783 /* --------------------------------------------------------
785 * --------------------------------------------------------
789 * Gets result of nodes phi translation into block.
791 * @param node the node
792 * @param block the target block
794 * @return a phi translation of node node into block block or NULL
796 static ir_node *get_translated(ir_node *block, ir_node *node)
798 if (is_irn_constlike(node))
801 return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
805 * Saves result of phi translation of node into predecessor
806 * at pos of block succ.
808 * @param node the node
809 * @param succ the successor of the translation target block
810 * @param pos the position of the predecessor block
811 * @param trans the translation result
814 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
816 if (is_irn_constlike(node))
818 /* insert or replace */
819 ir_nodehashmap_insert(map, node, trans);
823 /* Helper function to compare the values of pred and avail_pred. */
824 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
826 ir_node *avail_value = identify(avail_pred);
827 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
828 ir_node *trans_pred = get_translated(pred_block, pred);
831 if (trans_pred == NULL)
833 value = identify(trans_pred);
835 DB((dbg, LEVEL_3, "manual compare %+F %+F\n", pred, avail_pred));
837 return (value == avail_value);
841 * Does phi translation for redundant Div/Mod nodes only.
842 * Returns NULL for non-redundant node, which needs to be phi translated.
844 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
846 ir_node *mem = get_memop_mem(divmod);
847 ir_node *trans = get_translated_pred(block, pos, mem);
852 /* no partial redundancy if this is a mode_M phi */
853 if (is_Proj(trans)) {
854 /* The last memory operation in predecessor block */
855 ir_node *avail_op = get_Proj_pred(trans);
857 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
858 unsigned left, right;
860 if (is_Div(avail_op)) {
861 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
862 get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
864 left = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
865 right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
870 } else if (is_Mod(avail_op)) {
871 if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
873 left = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
874 right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
887 * Translates an expression above a Phi.
889 * @param node the node
890 * @param block the block the node is translated into
891 * @param pos the input number of the destination block
893 * @return a node representing the translated value
895 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *leaderset)
900 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
905 if (get_nodes_block(node) == block)
906 return get_Phi_pred(node, pos);
907 /* this phi does not need translation */
910 arity = get_irn_arity(node);
913 if (is_Div(node) || is_Mod(node)) {
914 ir_node *avail_op = phi_translate_divmod(node, block, pos);
921 in = ALLOCANZ(ir_node *, arity);
923 /* A value has several representatives. The anti leader is chosen to be
924 the main representative. If we access a node as representative of a
925 value we always use the anti leader. The anti leader can be found by
926 antic_in(identify(node)). */
927 for (i = 0; i < arity; ++i) {
928 ir_node *pred = get_irn_n(node, i);
929 ir_node *value = identify(pred);
930 /* get leader for pred to lookup its translated value */
931 ir_node *leader = ir_valueset_lookup(leaderset, value);
938 /* we cannot find this value in antic_in, because the value
939 has (possibly) changed! */
940 pred_trans = get_translated(pred_block, leader);
945 ir_node *mem = get_Div_mem(node);
950 pred_trans = get_Div_mem(node);
954 DB((dbg, LEVEL_3, "trans %+F of %+F is %+F\n", leader, pred_block, pred_trans));
955 if (pred_trans == NULL) {
958 new_pred = pred_trans;
960 /* loads: Predecessor is a memory phi, which translated yields a proj or
961 another phi. In case of projection and a load predecessor,
962 skip them and use the loads memory. */
963 if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
965 ir_node *loadstore = get_Proj_pred(pred_trans);
966 /* If we do not translate this node, we will get its value wrong. */
969 if (is_Load(loadstore)) {
970 /* Put new load under the adjacent loads memory edge
971 such that GVN may compare them. */
972 new_pred = get_Load_mem(loadstore);
973 } else if (is_Store(loadstore)) {
974 new_pred = get_Store_mem(loadstore);
978 /* predecessor value changed, so translation is needed */
979 if (identify(new_pred) != identify(pred))
984 DB((dbg, LEVEL_4, "in %+F\n", new_pred));
991 DB((dbg, LEVEL_3, "Translate\n"));
994 pred_block = get_nodes_block(in[0]);
996 /* copy node to represent the new value.
997 We do not translate nodes that do not need translation,
998 so we use the newly created nodes as value representatives only.
999 Their block is not important, because we create new ones during
1000 the insert node phase. */
1002 get_irn_dbg_info(node),
1009 /* We need the attribute copy here, because the Hash value of a
1010 node might depend on it. */
1011 copy_node_attr(environ->graph, node, nn);
1012 /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
1013 because it already uses availability. */
1015 DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
1020 * Block-walker, computes Antic_in(block).
1021 * Builds a value tree out of the graph by translating values
1024 * @param block the block
1025 * @param ctx the walker environment
1027 static void compute_antic(ir_node *block, void *ctx)
1029 pre_env *env = (pre_env*)ctx;
1030 block_info *succ_info;
1036 ir_valueset_iterator_t iter;
1039 /* filter blocks from topological walker */
1040 if (! is_Block(block))
1043 /* the end block has no successor */
1044 if (block == env->end_block)
1047 info = get_block_info(block);
1049 size = ir_valueset_size(info->antic_in);
1050 n_succ = get_Block_n_cfg_outs(block);
1053 if (env->first_iter) {
1054 #if IGNORE_INF_LOOPS
1055 /* keep antic_in of infinite loops empty */
1056 if (! is_in_infinite_loop(block)) {
1057 foreach_valueset(info->exp_gen, value, expr, iter) {
1058 ir_valueset_insert(info->antic_in, value, expr);
1062 foreach_valueset(info->exp_gen, value, expr, iter) {
1063 ir_valueset_insert(info->antic_in, value, expr);
1068 /* successor might have phi nodes */
1069 if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
1070 succ = get_Block_cfg_out(block, 0);
1071 int pos = get_Block_cfgpred_pos(succ, block);
1072 succ_info = get_block_info(succ);
1074 /* initialize translated set */
1075 if (env->first_iter) {
1076 info->trans = XMALLOC(ir_nodehashmap_t);
1077 ir_nodehashmap_init(info->trans);
1080 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1081 ir_node *trans = get_translated(block, expr);
1082 ir_node *trans_value;
1086 trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
1088 /* create new value if necessary */
1089 trans_value = identify_or_remember(trans);
1091 DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1093 /* On value change (phi present) we need the translated node
1094 to represent the new value for possible further translation. */
1095 if (value != trans_value)
1100 if (is_clean_in_block(expr, block, info->antic_in)) {
1102 /* Prevent information flow over the backedge of endless loops. */
1103 if (env->iteration <= 2 || (is_backedge(succ, pos) && !is_in_infinite_loop(succ))) {
1104 ir_valueset_replace(info->antic_in, trans_value, represent);
1107 ir_valueset_replace(info->antic_in, trans_value, represent);
1110 set_translated(info->trans, expr, represent);
1113 } else if (n_succ > 1) {
1115 ir_node *common = NULL;
1116 ir_node *succ0 = get_Block_cfg_out(block, 0);
1117 block_info *succ0_info = get_block_info(succ0);
1119 /* disjoint of antic_ins */
1120 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1121 /* iterate over remaining successors */
1122 for (i = 1; i < n_succ; ++i) {
1123 ir_node *succ = get_Block_cfg_out(block, i);
1124 block_info *succ_info = get_block_info(succ);
1126 /* value in antic_in? */
1127 common = ir_valueset_lookup(succ_info->antic_in, value);
1132 if (common && is_clean_in_block(expr, block, info->antic_in))
1133 ir_valueset_replace(info->antic_in, value, expr);
1138 DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1140 if (size != ir_valueset_size(info->antic_in))
1144 /* --------------------------------------------------------
1145 * Main algorithm Avail_out
1146 * --------------------------------------------------------
1150 * Computes Avail_out(block):
1152 * Avail_in(block) = Avail_out(dom(block))
1153 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1156 * This function must be called in the top-down topological order:
1157 * Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1159 * @param block the block
1160 * @param ctx walker context
1162 static void compute_avail_top_down(ir_node *block, void *ctx)
1164 pre_env *env = (pre_env*)ctx;
1167 if (block == env->end_block)
1170 info = get_block_info(block);
1172 /* Add all nodes from the immediate dominator.
1173 This ensures that avail_out contains the leader. */
1174 if (block != env->start_block) {
1175 ir_node *dom_block = get_Block_idom(block);
1176 block_info *dom_info = get_block_info(dom_block);
1179 ir_valueset_iterator_t iter;
1181 foreach_valueset(dom_info->avail_out, value, expr, iter)
1182 /* replace: use the leader from dominator, not local exp_gen */
1183 ir_valueset_replace(info->avail_out, value, expr);
1186 DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1189 /* --------------------------------------------------------
1190 * Main algorithm redundancy detection
1191 * --------------------------------------------------------
1195 * Returns a valid mode if the value of expr is a partially redundant value.
1197 * @param block the block
1198 * @param expr the expression
1200 * @return mode of the expression if it is partially redundant else NULL
1202 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1204 ir_node *first_avail = NULL;
1206 int arity = get_irn_arity(block);
1207 int fully_redundant = 1;
1208 int partially_redundant = 0;
1209 ir_mode *mode = NULL;
1211 DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1213 /* for each predecessor blocks */
1214 for (pos = 0; pos < arity; ++pos) {
1215 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1216 block_info *pred_info;
1217 ir_node *trans_expr;
1218 ir_node *trans_value;
1219 ir_node *avail_expr;
1221 pred_info = get_block_info(pred_block);
1222 trans_expr = get_translated(pred_block, expr);
1223 trans_value = identify(trans_expr);
1225 if (is_Const(trans_expr))
1226 avail_expr = trans_expr;
1228 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1230 /* value might be available through a not yet existing constant */
1231 if (avail_expr == NULL && is_Const(trans_expr)) {
1232 /* limit range of new constants */
1233 ir_mode *cmode = get_irn_mode(trans_expr);
1234 ir_tarval *upper = new_tarval_from_long(127, cmode);
1235 ir_tarval *lower = new_tarval_from_long(-127, cmode);
1236 ir_tarval *c = get_Const_tarval(trans_expr);
1238 /* tarval within range? */
1239 if (tarval_cmp(lower, c) == ir_relation_less_equal &&
1240 tarval_cmp(c, upper) == ir_relation_less_equal) {
1241 avail_expr = trans_expr;
1247 DB((dbg, LEVEL_3, "avail_expr %+F trans_expr %+F\n", avail_expr, trans_expr));
1249 if (avail_expr == NULL) {
1250 pred_info->avail = trans_expr;
1251 pred_info->found = 0;
1252 fully_redundant = 0;
1254 /* expr is available, use the leader */
1255 pred_info->avail = avail_expr;
1256 pred_info->found = 1;
1257 mode = get_irn_mode(avail_expr);
1258 partially_redundant = 1;
1260 if (first_avail == NULL)
1261 first_avail = avail_expr;
1262 else if (first_avail != avail_expr)
1263 /* Multiple different expressions are available,
1264 This is why we need no cut over avail_out sets. */
1265 fully_redundant = 0;
1267 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1271 /* If it is not the same value already existing along every predecessor
1272 and it is defined by some predecessor then it is partially redundant. */
1273 if (! partially_redundant || fully_redundant)
1279 * Updates the new_set of a block by adding the new_set of
1280 * the immediate dominating block.
1284 static void update_new_set(ir_node *block, ir_node *idom)
1288 ir_valueset_iterator_t iter;
1289 block_info *curr_info = get_block_info(block);
1290 block_info *idom_info = get_block_info(idom);
1293 DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1294 foreach_valueset(idom_info->new_set, value, expr, iter) {
1295 /* inherit new_set from immediate dominator */
1296 ir_valueset_insert(curr_info->new_set, value, expr);
1297 /* replace in avail_out */
1298 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1300 #ifdef DEBUG_libfirm
1302 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1304 } /* update_new_set */
1307 * Checks if hoisting irn is greedy.
1308 * Greedy hoisting means that there are non partially redundant nodes
1309 * hoisted. This happens if a partially redundant node has
1310 * non redundant predecessors.
1312 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1314 int block_arity = get_irn_arity(block);
1315 int arity = get_irn_arity(irn);
1317 block_info *info = get_block_info(block);
1319 /* As long as the predecessor values are available in all predecessor blocks,
1320 we can hoist this value. */
1321 for (pos = 0; pos < block_arity; ++pos) {
1322 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1323 block_info *pred_info = get_block_info(pred_block);
1325 for (i = 0; i < arity; ++i) {
1326 ir_node *pred = get_irn_n(irn, i);
1334 /* Very conservative min cut. Phi might only have 1 user. */
1335 if (is_Phi(pred) && get_irn_n_edges(pred) != 1)
1339 if (is_Phi(pred) && get_nodes_block(pred) == block)
1342 DB((dbg, LEVEL_3, "pred %+F\n", pred));
1343 value = identify(pred);
1344 leader = ir_valueset_lookup(info->antic_in, value);
1347 DB((dbg, LEVEL_3, "lead %+F\n", leader));
1348 trans = get_translated(pred_block, leader);
1351 DB((dbg, LEVEL_3, "trans %+F\n", trans));
1353 trans_val = identify(trans);
1354 DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1356 if (is_Const(trans_val) || is_SymConst(trans_val)) {
1357 /* existing constant */
1358 if (get_irn_idx(trans_val) < environ->last_idx) {
1361 /* limit range of new constants */
1362 ir_mode *cmode = get_irn_mode(trans);
1363 ir_tarval *upper = new_tarval_from_long(128, cmode);
1364 ir_tarval *lower = new_tarval_from_long(-128, cmode);
1365 ir_tarval *c = get_Const_tarval(trans);
1367 /* tarval within range? */
1368 if (tarval_cmp(lower, c) == ir_relation_less &&
1369 tarval_cmp(c, upper) == ir_relation_less) {
1378 if (is_irn_constlike(trans_val))
1381 avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1383 DB((dbg, LEVEL_3, "avail %+F\n", avail));
1387 /* only optimize if predecessors have been optimized */
1388 if (ir_valueset_lookup(info->antic_done, value) == NULL)
1397 * Perform insertion of partially redundant values.
1398 * For every block node, do the following:
1399 * 1. Propagate the NEW_SETS of the dominator into the current block.
1400 * If the block has multiple predecessors,
1401 * 2a. Iterate over the ANTIC expressions for the block to see if
1402 * any of them are partially redundant.
1403 * 2b. If so, insert them into the necessary predecessors to make
1404 * the expression fully redundant.
1405 * 2c. Insert a new Phi merging the values of the predecessors.
1406 * 2d. Insert the new Phi, and the new expressions, into the
1409 * @param block the block
1410 * @param ctx the walker environment
1412 static void insert_nodes_walker(ir_node *block, void *ctx)
1414 pre_env *env = (pre_env*)ctx;
1415 int arity = get_irn_arity(block);
1421 ir_valueset_iterator_t iter;
1424 if (! is_Block(block))
1427 /* ensure that even the start block has a new_set */
1428 info = get_block_info(block);
1430 ir_valueset_del(info->new_set);
1431 info->new_set = ir_valueset_new(16);
1433 if (block == env->start_block)
1436 DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1438 idom = get_Block_idom(block);
1439 update_new_set(block, idom);
1441 /* process only path joining blocks */
1446 /* This is the main reason antic_in is preverred over antic_out;
1447 we may iterate over every anticipated value first and not
1448 over the predecessor blocks. */
1449 foreach_valueset(info->antic_in, value, expr, iter) {
1455 if (ir_valueset_lookup(info->antic_done, value))
1458 /* filter phi nodes from antic_in */
1462 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1464 /* A value computed in the dominator is totally redundant.
1465 Hence we have nothing to insert. */
1466 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1467 DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1468 DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1470 ir_valueset_insert(info->antic_done, value, expr);
1474 if (is_hoisting_greedy(expr, block)) {
1475 DB((dbg, LEVEL_2, "greedy\n"));
1479 mode = is_partially_redundant(block, expr, value);
1483 #if LOADS || OLD_DIVMODS || DIVMODS
1484 /* save old mode_M phis to remove keepalive edges later */
1485 if (is_memop(expr)) {
1486 ir_node *mem = get_memop_mem(expr);
1487 if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1488 ir_nodeset_insert(env->keeps, mem);
1493 #ifdef DEBUG_libfirm
1494 if (! is_Proj(expr)) {
1495 if (env->first_iter)
1496 inc_stats(gvnpre_stats->first_iter_found);
1497 inc_stats(gvnpre_stats->partially);
1499 if (is_Load(expr) || is_Store(expr))
1500 inc_stats(gvnpre_stats->loads);
1501 else if (is_Div(expr) || is_Mod(expr))
1502 inc_stats(gvnpre_stats->divmods);
1505 phi_in = XMALLOCN(ir_node *, arity);
1507 /* for each predecessor block */
1508 for (pos = 0; pos < arity; ++pos) {
1509 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1510 block_info *pred_info;
1512 pred_info = get_block_info(pred_block);
1514 if (! pred_info->found) {
1516 int node_arity = get_irn_arity(expr);
1517 ir_node **in = XMALLOCNZ(ir_node *, node_arity);
1519 ir_node *new_value, *new_value2;
1520 ir_node *target_block = pred_block;
1522 for (i = 0; i < node_arity; ++i) {
1523 ir_node *pred = get_irn_n(expr, i);
1524 ir_node *value = identify(pred);
1530 /* transform knowledge over the predecessor from
1531 anti-leader world into leader world. */
1533 DB((dbg, LEVEL_3, "pred %+F\n", pred));
1534 value = identify(pred);
1536 /* get leader for pred to lookup its translated value */
1537 leader = ir_valueset_lookup(info->antic_in, value);
1540 DB((dbg, LEVEL_3, "lead %+F\n", leader));
1542 trans = get_translated(pred_block, leader);
1545 DB((dbg, LEVEL_3, "trans %+F\n", trans));
1547 /* in case of phi, we are done */
1548 if (is_Phi(pred) && get_nodes_block(pred) == block) {
1553 trans_val = identify(trans);
1554 DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1556 /* constants are always available but not in avail set */
1557 if (is_irn_constlike(trans_val)) {
1563 In case of loads we need to make sure the hoisted
1564 loads are found despite their unique value. */
1565 avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1566 DB((dbg, LEVEL_3, "avail %+F\n", avail));
1568 assert(avail && "predecessor has to be available");
1573 target_block = get_nodes_block(in[0]);
1575 /* Copy node to represent the new value.
1576 We use translated nodes as value representatives only.
1577 They have anti leaders as predecessors, not leaders!
1578 So we have to create a new node using leaders. */
1579 trans = new_ir_node(
1580 get_irn_dbg_info(expr),
1585 get_irn_arity(expr),
1588 /* We need the attribute copy here, because the Hash value of a
1589 node might depend on it. */
1590 copy_node_attr(environ->graph, expr, trans);
1592 /* value is now available in target block through trans
1593 insert (not replace) because it has not been available */
1594 new_value = identify_or_remember(trans);
1595 ir_valueset_insert(pred_info->avail_out, new_value, trans);
1596 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value));
1598 new_value2 = identify(get_translated(pred_block, expr));
1599 ir_valueset_insert(pred_info->avail_out, new_value2, trans);
1600 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value2));
1602 DB((dbg, LEVEL_3, "Use new %+F in %+F because %+F(%+F) not available\n", trans, pred_block, expr, value));
1604 phi_in[pos] = trans;
1606 /* value available */
1607 phi_in[pos] = pred_info->avail;
1609 DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1612 /* We do not connect tuples as they will be connected automatically
1613 by the corresponding projections. */
1614 if (get_irn_mode(expr) != mode_T) {
1616 phi = new_r_Phi(block, arity, phi_in, mode);
1617 DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1619 /* This value is now available through the new phi.
1620 insert || replace in avail_out */
1621 ir_valueset_replace(info->avail_out, value, phi);
1622 ir_valueset_insert(info->new_set, value, phi);
1626 /* already optimized this value in this block */
1627 ir_valueset_insert(info->antic_done, value, expr);
1633 static void update_new_set_walker(ir_node *block, void *ctx)
1635 pre_env *env = (pre_env*)ctx;
1637 if (! is_Block(block))
1639 if (block == env->start_block)
1642 update_new_set(block, get_Block_idom(block));
1646 * Domtree block walker to insert nodes with dying operands
1647 * into the highest possible block whilst still being anticipated.
1649 static void hoist_high(ir_node *block, void *ctx)
1651 pre_env *env = (pre_env*)ctx;
1652 block_info *curr_info;
1653 ir_valueset_iterator_t iter;
1656 int arity = get_irn_arity(block);
1658 if (! is_Block(block))
1661 curr_info = get_block_info(block);
1663 if (curr_info->new_set)
1664 ir_valueset_del(curr_info->new_set);
1665 curr_info->new_set = ir_valueset_new(16);
1667 if (block == env->start_block)
1673 DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1675 /* foreach entry optimized by insert node phase */
1676 foreach_valueset(curr_info->antic_done, value, expr, iter) {
1679 /* TODO currently we cannot handle load and their projections */
1680 if (is_memop(expr) || is_Proj(expr))
1683 DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
1685 /* visit hoisted expressions */
1686 for (pos = 0; pos < arity; ++pos) {
1687 /* standard target is predecessor block */
1688 ir_node *target = get_Block_cfgpred_block(block, pos);
1689 block_info *pred_info = get_block_info(target);
1691 ir_node *new_target;
1692 ir_node *trans_expr;
1693 ir_node *trans_value;
1697 unsigned nest_depth;
1698 block_info *dom_info;
1700 /* get phi translated value */
1701 trans_expr = get_translated(target, expr);
1702 trans_value = identify(trans_expr);
1703 avail = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1705 /* get the used expr on this path */
1707 /* TODO when does this happen? */
1711 avail_arity = get_irn_arity(avail);
1712 value = identify(avail);
1714 /* anticipation border */
1716 nest_depth = get_loop_depth(get_irn_loop(target));
1718 /* Either push the hoisted nodes up their path,
1719 or try to put them directly into their common dominator. */
1721 /* By using block (instead of target) as initial block,
1722 we only allow hoisting into a common block of
1723 both predecessor blocks. */
1729 while (dom && dom != get_Block_idom(block)) {
1731 dom = get_Block_idom(dom);
1732 dom_info = get_block_info(dom);
1733 DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
1735 /* TODO Being in antic_in means hoistable above block,
1736 but we need 'hoistable into block'.
1737 This could be achieved by a flag for each valueset pair,
1738 being set during antic computation. */
1740 /* check if available node ist still anticipated and clean */
1741 if (! ir_valueset_lookup(dom_info->antic_in, value)) {
1742 DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
1746 nest_depth = get_loop_depth(get_irn_loop(dom));
1748 /* do not hoist into loops */
1749 if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
1750 DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
1751 /* not a suitable location */
1755 /* check if operands die */
1757 /* check for uses on current path */
1758 for (i = 0; i < avail_arity; i++) {
1759 ir_node *pred = get_irn_n(avail, i);
1760 ir_node *pred_value = identify(pred);
1765 DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
1767 if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
1768 DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
1773 /* check every successor */
1774 foreach_out_edge(pred, edge) {
1775 ir_node *succ = get_edge_src_irn(edge);
1776 DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
1778 /* check only successors on current path to end */
1779 if (block_dominates(dom, get_nodes_block(succ))) {
1780 ir_node *succ_value = identify(succ);
1782 /* Do we have another user than avail?
1783 Then predecessor is not dead after removal of avail. */
1784 if (succ_value != value) {
1785 DB((dbg, LEVEL_4, "still used in %+F\n", succ));
1796 /* only try common dominator */
1801 /* put node into new target block */
1803 block_info *target_info = get_block_info(new_target);
1804 int nn_arity = get_irn_arity(avail);
1805 ir_node **in = XMALLOCN(ir_node *, nn_arity);
1809 DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
1810 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1812 for (i = 0; i < nn_arity; ++i) {
1813 ir_node *pred = get_irn_n(avail, i);
1814 ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
1819 get_irn_dbg_info(avail),
1823 get_irn_mode(avail),
1828 identify_or_remember(nn);
1829 /* TODO Nodes are inserted into a dominating block and should
1830 be available from this point on. Currently we do not push
1831 the availability information through during the walk. */
1832 ir_valueset_insert(target_info->new_set, value, nn);
1833 ir_valueset_insert(target_info->avail_out, value, nn);
1840 /* --------------------------------------------------------
1841 * Elimination of fully redundant nodes
1842 * --------------------------------------------------------
1846 * Walker which finds redundant nodes using avail_out sets
1847 * and exchanges them for existing ones.
1848 * We cannot change the graph here as this would affect
1849 * the hash values of the nodes.
1851 * @param irn the node
1852 * @param ctx the walker environment
1854 static void eliminate(ir_node *irn, void *ctx)
1856 pre_env *env = (pre_env*)ctx;
1858 if (! is_Block(irn)) {
1859 ir_node *block = get_nodes_block(irn);
1860 block_info *info = get_block_info(block);
1861 ir_node *value = identify(irn);
1863 if (value != NULL) {
1864 ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1865 DB((dbg, LEVEL_3, "Elim %+F(%+F) avail %+F\n", irn, value, expr));
1867 if (expr != NULL && expr != irn) {
1868 elim_pair *p = OALLOC(env->obst, elim_pair);
1872 p->next = env->pairs;
1873 if (get_irn_idx(expr) > env->last_idx)
1874 p->reason = FS_OPT_GVN_PARTLY;
1876 p->reason = FS_OPT_GVN_FULLY;
1878 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1885 * Do all the recorded changes and optimize
1886 * newly created Phi's.
1888 * @param pairs list of elimination pairs
1890 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1893 ir_node *end = environ->end_node;
1895 for (p = pairs; p != NULL; p = p->next) {
1896 /* might be already changed */
1897 p->new_node = skip_Id(p->new_node);
1899 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1901 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1902 * which we can optimize here */
1903 if (is_Phi(p->new_node)) {
1905 ir_node *res = NULL;
1907 for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1908 ir_node *pred = get_irn_n(p->new_node, i);
1910 if (pred != p->old_node) {
1919 exchange(p->new_node, res);
1923 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1925 exchange(p->old_node, p->new_node);
1928 /* remove keep alive edges of unused mode_M phis */
1929 foreach_ir_nodeset(keeps, m_phi, iter) {
1930 remove_End_keepalive(end, m_phi);
1932 } /* eliminate_nodes */
1935 /* --------------------------------------------------------
1937 * --------------------------------------------------------
1941 * Gvn_Pre algorithm.
1943 * @param irg the graph
1944 * @param env the environment
1946 static void gvn_pre(ir_graph *irg, pre_env *env)
1948 unsigned antic_iter;
1949 unsigned insert_iter;
1951 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1953 /* allocate block info */
1954 irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1956 ir_nodehashmap_init(&value_map);
1958 /* generate exp_gen */
1959 irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
1960 dump_all_expgen_sets(env->list);
1962 /* compute the avail_out sets for all blocks */
1963 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
1965 /* compute the anticipated value sets for all blocks */
1967 env->first_iter = 1;
1970 /* antic_in passes */
1973 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
1975 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
1976 env->first_iter = 0;
1977 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1979 } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
1981 DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
1983 ir_nodeset_init(env->keeps);
1985 env->first_iter = 1;
1986 /* compute redundant expressions */
1989 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
1991 /* TODO topologically top down would be better; fewer iterations. */
1992 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
1993 env->first_iter = 0;
1994 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1995 } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
1996 DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
1999 /* An attempt to reduce lifetimes by hoisting already hoisted values
2000 even higher if their operands die. */
2001 dom_tree_walk_irg(irg, hoist_high, NULL, env);
2002 /* update avail_out for elimination */
2003 dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
2006 /* Deactivate edges to prevent intelligent removal of nodes,
2007 or else we will get deleted nodes which we try to exchange. */
2008 edges_deactivate(environ->graph);
2010 /* eliminate nodes */
2011 irg_walk_graph(irg, NULL, eliminate, env);
2012 eliminate_nodes(env->pairs, env->keeps);
2014 ir_nodeset_destroy(env->keeps);
2018 * Gvn_Pre pass for graph irg.
2020 * @param irg the graph
2022 void do_gvn_pre(ir_graph *irg)
2024 struct obstack obst;
2027 optimization_state_t state;
2028 block_info *block_info;
2030 /* bads and unreachables cause too much trouble with dominance,
2031 loop info for endless loop detection,
2032 no critical edges is PRE precondition
2034 assure_irg_properties(irg,
2035 IR_GRAPH_PROPERTY_NO_BADS
2036 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
2037 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
2038 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
2039 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
2040 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
2042 /* register a debug mask */
2043 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
2045 save_optimization_state(&state);
2046 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2048 edges_activate(irg);
2051 DEBUG_ONLY(init_stats();)
2053 /* setup environment */
2054 obstack_init(&obst);
2058 env.start_block = get_irg_start_block(irg);
2059 env.end_block = get_irg_end_block(irg);
2060 env.end_node = get_irg_end(irg);
2063 env.last_idx = get_irg_last_idx(irg);
2065 /* Detect and set links of infinite loops to non-zero. */
2069 new_identities(irg);
2070 env.value_table = irg->value_table;
2071 irg->value_table = NULL;
2074 /* Switch on GCSE. We need it to correctly compute
2075 the value of a node, which is independent from
2077 set_opt_global_cse(1);
2078 /* new_identities() */
2079 if (irg->value_table != NULL)
2080 del_pset(irg->value_table);
2081 /* initially assumed nodes in pset are 512 */
2082 irg->value_table = new_pset(compare_gvn_identities, 512);
2084 env.gvnpre_values = irg->value_table;
2087 /* do GVN-PRE pass */
2089 DEBUG_ONLY(print_stats();)
2091 /* clean up: delete all sets */
2092 for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2093 free_block_info(block_info);
2096 DEBUG_ONLY(free_stats();)
2097 ir_nodehashmap_destroy(&value_map);
2098 obstack_free(&obst, NULL);
2099 ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2101 /* Pin the graph again.
2102 This is needed due to the use of set_opt_global_cse(1) */
2103 set_irg_pinned(irg, op_pin_state_pinned);
2104 restore_optimization_state(&state);
2105 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2108 irg->value_table = env.value_table;
2109 del_pset(irg->value_table);
2110 irg->value_table = env.gvnpre_values;
2113 /* TODO There seem to be optimizations that try to use the existing
2115 new_identities(irg);
2117 /* TODO assure nothing else breaks. */
2118 set_opt_global_cse(0);
2119 edges_activate(irg);
2122 /* Creates an ir_graph pass for do_gvn_pre. */
2123 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2125 return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);