2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Global Value Numbering Partial Redundancy Elimination
23 * (VanDrunen Hosking 2004)
24 * @author Michael Beck
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
46 #include "irgraph_t.h"
51 /* suggested by GVN-PRE authors */
52 #define MAX_ANTIC_ITER 10
53 #define MAX_INSERT_ITER 3
55 /* Stops antic iteration from processing endless loops. */
56 #define IGNORE_INF_LOOPS 1
57 /* Stops antic information to flow over infinite loop backedge */
58 #define NO_INF_LOOPS 0
60 /* Attempt to reduce register pressure and reduce code size
65 /* Seamless implementation of handling loads and generally memory
66 dependent nodes with GVN-PRE. */
72 /* Only optimize node directly after phi node if node is only successor. */
75 /* GVN is very limited. This enables optimize_node during value recognition.
76 GVN ist still very limited, since a+1+b+1 doesn't equal a+b+2.
77 TODO Broken for yet unknown reasons. */
78 #define OPTIMIZE_NODES 0
82 /* NIY Choose to be optimized nodes in a more sophisticated way
83 to reduce number of newly introduced phi nodes. */
84 #define BETTER_GREED 0
87 /** Additional info we need for every block. */
88 typedef struct block_info {
89 ir_valueset_t *exp_gen; /* contains this blocks clean expressions */
90 ir_valueset_t *avail_out; /* available values at block end */
91 ir_valueset_t *antic_in; /* clean anticipated values at block entry */
92 ir_valueset_t *antic_done; /* keeps elements of antic_in after insert nodes phase */
93 ir_valueset_t *new_set; /* new by hoisting made available values */
94 ir_nodehashmap_t *trans; /* contains translated nodes translated into block */
95 ir_node *avail; /* saves available node for insert node phase */
96 int found; /* saves kind of availability for insert_node phase */
97 ir_node *block; /* block of the block_info */
98 struct block_info *next; /* links all instances for easy access */
102 * A pair of nodes to be exchanged.
103 * We have to defer the exchange because there are still needed references
106 typedef struct elim_pair {
107 ir_node *old_node; /* node that will be replaced */
108 ir_node *new_node; /* replacement for old_node */
109 struct elim_pair *next; /* links all instances for easy access */
110 int reason; /* reason for the replacement */
113 /** environment for the GVN-PRE algorithm */
114 typedef struct pre_env {
115 ir_graph *graph; /* current graph */
116 struct obstack *obst; /* obstack to allocate on */
117 ir_node *start_block; /* start block of the current graph */
118 ir_node *end_block; /* end block of the current graph */
119 ir_node *end_node; /* end node of the current graph */
120 block_info *list; /* block_info list head */
121 elim_pair *pairs; /* elim_pair list head */
122 ir_nodeset_t *keeps; /* a list of to be removed phis to kill their keep alive edges */
123 unsigned last_idx; /* last node index of input graph */
124 char changes; /* flag for fixed point iterations - non-zero if changes occurred */
125 char first_iter; /* non-zero for first fixed point iteration */
126 int iteration; /* iteration counter */
128 pset *value_table; /* standard value table*/
129 pset *gvnpre_values; /* gvnpre value table */
133 static pre_env *environ;
135 /* custom GVN value map */
136 static ir_nodehashmap_t value_map;
138 /* debug module handle */
139 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
143 /* --------------------------------------------------------
145 * --------------------------------------------------------
148 typedef struct gvnpre_statistics {
155 int first_iter_found;
156 int antic_iterations;
157 int insert_iterations;
161 gvnpre_statistics *gvnpre_stats = NULL;
163 static void init_stats()
165 gvnpre_stats = XMALLOCZ(gvnpre_statistics);
168 static void free_stats()
174 static void print_stats()
176 gvnpre_statistics *stats = gvnpre_stats;
177 DB((dbg, LEVEL_1, "replaced : %d\n", stats->replaced));
178 DB((dbg, LEVEL_1, "antic_in iterations : %d\n", stats->antic_iterations));
179 DB((dbg, LEVEL_1, "insert iterations : %d\n", stats->insert_iterations));
180 DB((dbg, LEVEL_1, "infinite loops : %d\n", stats->infinite_loops));
181 DB((dbg, LEVEL_1, "fully redundant : %d\n", stats->fully));
182 DB((dbg, LEVEL_1, "partially redundant : %d\n", stats->partially));
183 DB((dbg, LEVEL_1, " loads : %d\n", stats->loads));
184 DB((dbg, LEVEL_1, " Divs/Mods : %d\n", stats->divmods));
185 DB((dbg, LEVEL_1, " hoist high : %d\n", stats->hoist_high));
186 DB((dbg, LEVEL_1, " first iteration : %d\n", stats->first_iter_found));
189 #define set_stats(var, value) (var)=(value)
190 #define inc_stats(var) ((var)+=1)
192 /* --------------------------------------------------------
194 * --------------------------------------------------------
200 * @param set the set to dump
201 * @param txt a text to describe the set
202 * @param block the owner block of the set
204 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
206 ir_valueset_iterator_t iter;
207 ir_node *value, *expr;
210 DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
212 foreach_valueset(set, value, expr, iter) {
214 DB((dbg, LEVEL_2, "\n"));
216 DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
218 DB((dbg, LEVEL_2, " %+F,", expr));
221 DB((dbg, LEVEL_2, "\n}\n"));
222 } /* dump_value_set */
225 * Dump all exp_gen value sets.
227 * @param list the list of block infos to retrieve the sets from
229 static void dump_all_expgen_sets(block_info *list)
231 block_info *block_info;
233 for (block_info = list; block_info != NULL; block_info = block_info->next) {
234 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
240 #define dump_all_expgen_sets(list)
241 #define dump_value_set(set, txt, block)
243 #endif /* DEBUG_libfirm */
245 /* --------------------------------------------------------
247 * --------------------------------------------------------
251 * Compares node collisions in valuetable.
252 * Modified identities_cmp().
254 static int compare_gvn_identities(const void *elt, const void *key)
256 ir_node *a = (ir_node *)elt;
257 ir_node *b = (ir_node *)key;
260 if (a == b) return 0;
262 /* phi nodes kill predecessor values and are always different */
263 if (is_Phi(a) || is_Phi(b))
266 /* memops are not the same, even if we want to optimize them
267 we have to take the order in account */
268 if (is_memop(a) || is_memop(b)) {
269 /* Loads with the same predecessors are the same value;
270 this should only happen after phi translation. */
271 if ((! is_Load(a) || ! is_Load(b)) && (! is_Store(a) || ! is_Store(b)))
275 if ((get_irn_op(a) != get_irn_op(b)) ||
276 (get_irn_mode(a) != get_irn_mode(b))) return 1;
278 /* compare if a's in and b's in are of equal length */
279 irn_arity_a = get_irn_arity(a);
280 if (irn_arity_a != get_irn_arity(b))
283 /* blocks are never the same */
284 if (is_Block(a) || is_Block(b))
287 /* should only be used with gcse enabled */
288 assert(get_opt_global_cse());
290 /* compare a->in[0..ins] with b->in[0..ins] */
291 for (i = 0; i < irn_arity_a; ++i) {
292 ir_node *pred_a = get_irn_n(a, i);
293 ir_node *pred_b = get_irn_n(b, i);
294 if (pred_a != pred_b) {
295 if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
301 * here, we already now that the nodes are identical except their
304 if (a->op->ops.node_cmp_attr)
305 return a->op->ops.node_cmp_attr(a, b);
311 * Identify does a lookup in the GVN valuetable.
312 * To be used when no new GVN values are to be created.
314 * @param e a node representing an expression
315 * @return a node representing the value
317 static ir_node *identify(ir_node *irn)
319 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
322 /* irn represents a new value, so return the leader */
323 return identify_remember(irn);
327 * remember() adds node irn to the GVN valuetable.
328 * Identify_remember only identifies values of nodes with the
329 * same predecessor nodes (not values). By creating a node from the predecessor
330 * values/leaders, a true valuetree is built. Phis kill their predecessor value,
331 * so no circular dependencies need to be resolved.
334 * Maybe this could be implemented with a custom node hash that takes
335 * phi nodes and true values (instead of predecessors) into account,
336 * resulting in value numbers.
337 * TODO This unnecessarily also handles nodes like calls, which are never equal.
339 * @param irn a node representing an expression
340 * @return the value of the expression
342 static ir_node *remember(ir_node *irn)
344 int arity = get_irn_arity(irn);
347 ir_node **in = XMALLOCN(ir_node *, arity);
350 for (i = 0; i < arity; ++i) {
351 ir_node *pred = get_irn_n(irn, i);
352 /* value and leader at the same time */
353 ir_node *pred_value = identify(pred);
355 /* phi will be translated anyway, so kill the predecessor values
356 this also prevents circular dependencies */
358 /* every phi represents its own value */
363 /* predecessor is not its value representation/the leader */
364 if (pred != pred_value)
369 if (changed && !is_memop(irn) && get_irn_mode(irn) != mode_X) {
370 /* create representative for */
371 ir_node *nn = new_ir_node(
372 get_irn_dbg_info(irn),
374 get_nodes_block(irn),
379 copy_node_attr(environ->graph, irn, nn);
382 /* TODO optimize_node() uses the valuetable and thus the custom
383 identify_cmp() and will fail trying. */
384 environ->graph->value_table = environ->value_table;
385 nn = optimize_node(nn);
386 environ->graph->value_table = environ->gvnpre_values;
389 /* now the value can be determined because the
390 predecessors are the leaders */
391 value = identify_remember(nn);
393 value = identify_remember(irn);
397 DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
398 ir_nodehashmap_insert(&value_map, irn, value);
404 * When the value map has been built we may lookup expressions
405 * and remember them if new.
407 static ir_node *identify_or_remember(ir_node *irn)
409 ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
413 return remember(irn);
416 /* --------------------------------------------------------
418 * --------------------------------------------------------
422 * Allocate block info for block block.
424 * @param block the block
425 * @param env the environment
427 static void alloc_block_info(ir_node *block, pre_env *env)
429 block_info *info = OALLOC(env->obst, block_info);
431 set_irn_link(block, info);
432 info->exp_gen = ir_valueset_new(16);
433 info->avail_out = ir_valueset_new(16);
434 info->antic_in = ir_valueset_new(16);
435 info->antic_done = ir_valueset_new(16);
436 info->trans = XMALLOC(ir_nodehashmap_t);
437 ir_nodehashmap_init(info->trans);
439 info->new_set = NULL;
444 info->next = env->list;
446 } /* alloc_block_info */
448 static void free_block_info(block_info *block_info)
450 ir_valueset_del(block_info->exp_gen);
451 ir_valueset_del(block_info->avail_out);
452 ir_valueset_del(block_info->antic_in);
453 if (block_info->trans) {
454 ir_nodehashmap_destroy(block_info->trans);
455 free(block_info->trans);
457 if (block_info->new_set)
458 ir_valueset_del(block_info->new_set);
462 * Bottom up walker that ensures that every block gets a block info.
464 * @param irn the node
465 * @param ctx the environment
467 static void block_info_walker(ir_node *irn, void *ctx)
470 pre_env *env = (pre_env*)ctx;
471 alloc_block_info(irn, env);
476 * Returns the block info of a block.
478 static block_info *get_block_info(ir_node *block)
480 return (block_info*)get_irn_link(block);
483 /* --------------------------------------------------------
484 * Infinite loop analysis
485 * --------------------------------------------------------
489 * Walker to set block marks and loop links to 0.
491 static void clear_block_mark_loop_link(ir_node *block, void *env)
495 if (is_Block(block)) {
496 set_Block_mark(block, 0);
497 set_loop_link(get_irn_loop(block), NULL);
502 * Returns non-zero if block is part of real loop loop.
505 static unsigned in_loop(ir_node *block, ir_loop *loop)
507 ir_loop *l = get_irn_loop(block);
508 ir_loop *outer = get_irg_loop(environ->graph);
511 /* loop tree root is not a loop */
512 if (l == NULL || l == outer)
514 l = get_loop_outer_loop(l);
520 * Returns the outermost real loop of loop.
522 static ir_loop *get_loop_outermost(ir_loop *loop)
524 ir_loop *outer = get_irg_loop(environ->graph);
526 ir_loop *last = NULL;
530 l = get_loop_outer_loop(l);
536 * Topologic bottom-up walker sets links of infinite loops to non-zero.
537 * Block marks are used to flag blocks reachable (from end) on the one hand,
538 * on the other hand they are set if the block is not part of an infinite loop.
540 static void infinite_loop_walker(ir_node *block, void *env)
546 if (! is_Block(block))
549 /* start block has no predecessors */
550 if (block == environ->start_block)
553 arity = get_irn_arity(block);
555 /* block not part of a real loop: no infinite loop */
556 if (get_irn_loop(block) == get_irg_loop(environ->graph))
557 set_Block_mark(block, 1);
559 if (get_Block_mark(block)) {
560 /* reachable block: mark all cf predecessors */
561 for (i = 0; i < arity; ++i) {
562 ir_node *pred = get_Block_cfgpred_block(block, i);
565 set_Block_mark(pred, 1);
568 /* We are in a real loop and see an unreachable block. */
569 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
571 /* flag loop as infinite */
572 set_loop_link(outermost_loop, outermost_loop);
573 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
575 /* The cf predecessors are unreachable, but can never be part of
576 an infinite loop, because we just reached them. So we set the
577 blockmark to prevent triggering the infinite loop detection. */
579 /* passing information to the cf predecessors */
580 for (i = 0; i < arity; ++i) {
581 ir_node *pred = get_Block_cfgpred_block(block, i);
586 /* If our cf predecessor is in the same endless loop,
587 it is also unreachable. */
588 if (in_loop(pred, outermost_loop)) {
589 set_Block_mark(pred, 0);
591 /* When we leave the unreachable loop, we artificially
592 declare the cf predecessor reachable. */
593 set_Block_mark(pred, 1);
600 * Sets loop links of outermost infinite loops to non-zero.
602 static void analyse_loops(ir_graph *irg)
604 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
606 /* reset block mark and loop links */
607 irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
609 /* mark end block reachable */
610 set_Block_mark(get_irg_end_block(irg), 1);
611 irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
613 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
616 #if IGNORE_INF_LOOPS || NO_INF_LOOPS
618 * Returns non-zero if block is part of an infinite loop.
620 static unsigned is_in_infinite_loop(ir_node *block)
624 assert(is_Block(block));
625 loop = get_irn_loop(block);
628 loop = get_loop_outermost(loop);
630 return (get_loop_link(loop) != NULL);
636 /* --------------------------------------------------------
638 * --------------------------------------------------------
642 * Returns non-zero if a node is movable and a possible candidate for PRE.
644 static unsigned is_nice_value(ir_node *n)
646 ir_mode *mode = get_irn_mode(n);
651 #if LOADS || OLD_DIVMODS || DIVMODS
652 if (is_Proj(n) && mode != mode_X && mode != mode_T)
661 return get_Load_volatility(n) == volatility_non_volatile;
663 return get_Store_volatility(n) == volatility_non_volatile;
666 if (get_irn_pinned(n) == op_pin_state_pinned)
669 if (! mode_is_data(mode)) {
670 if (! is_Div(n) && ! is_Mod(n))
677 * Checks if a node n is clean in block block for exp_gen.
680 * @param block the block
681 * @return non-zero value for clean node
683 static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
690 if (! is_nice_value(n))
694 /* filter loads with no phi predecessor from antic_in */
695 if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
697 if (is_Store(n) && ! is_Phi(get_Store_mem(n)))
703 ir_node *mem = get_Div_mem(n);
707 if (! is_Phi(mem) && ! is_NoMem(mem))
711 if (is_Mod(n) && ! is_Phi(get_Mod_mem(n)))
715 arity = get_irn_arity(n);
716 for (i = 0; i < arity; ++i) {
717 ir_node *pred = get_irn_n(n, i);
723 /* we only handle current block */
724 if (get_nodes_block(pred) != block)
727 if (! is_nice_value(pred))
730 value = identify(pred);
731 if (! ir_valueset_lookup(valueset, value))
739 * Topological walker puts nodes in top-down topological order into exp_gen set.
740 * Assumed to walk blockwise and nodewise topologically top-down.
742 * @param irn the node
743 * @param ctx the environment
745 static void topo_walker(ir_node *irn, void *ctx)
756 environ->graph->value_table = environ->value_table;
757 identify_remember(irn);
758 environ->graph->value_table = environ->gvnpre_values;
761 /* GVN step: remember the value. */
762 value = remember(irn);
764 if (is_irn_constlike(irn))
767 block = get_nodes_block(irn);
768 info = get_block_info(block);
770 if (get_irn_mode(irn) != mode_X)
771 ir_valueset_insert(info->avail_out, value, irn);
773 /* values that are not in antic_in also dont't need to be in any other set */
775 if (! is_nice_value(irn))
778 if (is_clean_in_block(irn, block, info->exp_gen)) {
779 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
781 ir_valueset_insert(info->exp_gen, value, irn);
785 /* --------------------------------------------------------
787 * --------------------------------------------------------
791 * Gets result of nodes phi translation into block.
793 * @param node the node
794 * @param block the target block
796 * @return a phi translation of node node into block block or NULL
798 static ir_node *get_translated(ir_node *block, ir_node *node)
800 if (is_irn_constlike(node))
803 return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
807 * Saves result of phi translation of node into predecessor
808 * at pos of block succ.
810 * @param node the node
811 * @param succ the successor of the translation target block
812 * @param pos the position of the predecessor block
813 * @param trans the translation result
816 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
818 if (is_irn_constlike(node))
820 /* insert or replace */
821 ir_nodehashmap_insert(map, node, trans);
825 /* Helper function to compare the values of pred and avail_pred. */
826 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
828 ir_node *avail_value = identify(avail_pred);
829 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
830 ir_node *trans_pred = get_translated(pred_block, pred);
833 if (trans_pred == NULL)
835 value = identify(trans_pred);
837 DB((dbg, LEVEL_3, "manual compare %+F %+F\n", pred, avail_pred));
839 return (value == avail_value);
843 * Does phi translation for redundant Div/Mod nodes only.
844 * Returns NULL for non-redundant node, which needs to be phi translated.
846 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
848 ir_node *mem = get_memop_mem(divmod);
849 ir_node *trans = get_translated_pred(block, pos, mem);
854 /* no partial redundancy if this is a mode_M phi */
855 if (is_Proj(trans)) {
856 /* The last memory operation in predecessor block */
857 ir_node *avail_op = get_Proj_pred(trans);
859 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
860 unsigned left, right;
862 if (is_Div(avail_op)) {
863 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
864 get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
866 left = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
867 right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
872 } else if (is_Mod(avail_op)) {
873 if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
875 left = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
876 right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
889 * Translates an expression above a Phi.
891 * @param node the node
892 * @param block the block the node is translated into
893 * @param pos the input number of the destination block
895 * @return a node representing the translated value
897 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *leaderset)
902 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
907 if (get_nodes_block(node) == block)
908 return get_Phi_pred(node, pos);
909 /* this phi does not need translation */
912 arity = get_irn_arity(node);
915 if (is_Div(node) || is_Mod(node)) {
916 ir_node *avail_op = phi_translate_divmod(node, block, pos);
923 in = ALLOCANZ(ir_node *, arity);
925 /* A value has several representatives. The anti leader is chosen to be
926 the main representative. If we access a node as representative of a
927 value we always use the anti leader. The anti leader can be found by
928 antic_in(identify(node)). */
929 for (i = 0; i < arity; ++i) {
930 ir_node *pred = get_irn_n(node, i);
931 ir_node *value = identify(pred);
932 /* get leader for pred to lookup its translated value */
933 ir_node *leader = ir_valueset_lookup(leaderset, value);
940 /* we cannot find this value in antic_in, because the value
941 has (possibly) changed! */
942 pred_trans = get_translated(pred_block, leader);
947 ir_node *mem = get_Div_mem(node);
952 pred_trans = get_Div_mem(node);
956 DB((dbg, LEVEL_3, "trans %+F of %+F is %+F\n", leader, pred_block, pred_trans));
957 if (pred_trans == NULL) {
960 new_pred = pred_trans;
962 /* loads: Predecessor is a memory phi, which translated yields a proj or
963 another phi. In case of projection and a load predecessor,
964 skip them and use the loads memory. */
965 if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
967 ir_node *loadstore = get_Proj_pred(pred_trans);
968 /* If we do not translate this node, we will get its value wrong. */
971 if (is_Load(loadstore)) {
972 /* Put new load under the adjacent loads memory edge
973 such that GVN may compare them. */
974 new_pred = get_Load_mem(loadstore);
975 } else if (is_Store(loadstore)) {
976 new_pred = get_Store_mem(loadstore);
980 /* predecessor value changed, so translation is needed */
981 if (identify(new_pred) != identify(pred))
986 DB((dbg, LEVEL_4, "in %+F\n", new_pred));
993 DB((dbg, LEVEL_3, "Translate\n"));
996 pred_block = get_nodes_block(in[0]);
998 /* copy node to represent the new value.
999 We do not translate nodes that do not need translation,
1000 so we use the newly created nodes as value representatives only.
1001 Their block is not important, because we create new ones during
1002 the insert node phase. */
1004 get_irn_dbg_info(node),
1011 /* We need the attribute copy here, because the Hash value of a
1012 node might depend on it. */
1013 copy_node_attr(environ->graph, node, nn);
1014 /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
1015 because it already uses availability. */
1017 DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
1022 * Block-walker, computes Antic_in(block).
1023 * Builds a value tree out of the graph by translating values
1026 * @param block the block
1027 * @param ctx the walker environment
1029 static void compute_antic(ir_node *block, void *ctx)
1031 pre_env *env = (pre_env*)ctx;
1032 block_info *succ_info;
1038 ir_valueset_iterator_t iter;
1041 /* filter blocks from topological walker */
1042 if (! is_Block(block))
1045 /* the end block has no successor */
1046 if (block == env->end_block)
1049 info = get_block_info(block);
1051 size = ir_valueset_size(info->antic_in);
1052 n_succ = get_Block_n_cfg_outs(block);
1055 if (env->first_iter) {
1056 #if IGNORE_INF_LOOPS
1057 /* keep antic_in of infinite loops empty */
1058 if (! is_in_infinite_loop(block)) {
1059 foreach_valueset(info->exp_gen, value, expr, iter) {
1060 ir_valueset_insert(info->antic_in, value, expr);
1064 foreach_valueset(info->exp_gen, value, expr, iter) {
1065 ir_valueset_insert(info->antic_in, value, expr);
1070 /* successor might have phi nodes */
1071 if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
1072 succ = get_Block_cfg_out(block, 0);
1073 int pos = get_Block_cfgpred_pos(succ, block);
1074 succ_info = get_block_info(succ);
1076 /* initialize translated set */
1077 if (env->first_iter) {
1078 info->trans = XMALLOC(ir_nodehashmap_t);
1079 ir_nodehashmap_init(info->trans);
1082 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1083 ir_node *trans = get_translated(block, expr);
1084 ir_node *trans_value;
1088 trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
1090 /* create new value if necessary */
1091 trans_value = identify_or_remember(trans);
1093 DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1095 /* On value change (phi present) we need the translated node
1096 to represent the new value for possible further translation. */
1097 if (value != trans_value)
1102 if (is_clean_in_block(expr, block, info->antic_in)) {
1104 /* Prevent information flow over the backedge of endless loops. */
1105 if (env->iteration <= 2 || (is_backedge(succ, pos) && !is_in_infinite_loop(succ))) {
1106 ir_valueset_replace(info->antic_in, trans_value, represent);
1109 ir_valueset_replace(info->antic_in, trans_value, represent);
1112 set_translated(info->trans, expr, represent);
1115 } else if (n_succ > 1) {
1117 ir_node *common = NULL;
1118 ir_node *succ0 = get_Block_cfg_out(block, 0);
1119 block_info *succ0_info = get_block_info(succ0);
1121 /* disjoint of antic_ins */
1122 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1123 /* iterate over remaining successors */
1124 for (i = 1; i < n_succ; ++i) {
1125 ir_node *succ = get_Block_cfg_out(block, i);
1126 block_info *succ_info = get_block_info(succ);
1128 /* value in antic_in? */
1129 common = ir_valueset_lookup(succ_info->antic_in, value);
1134 if (common && is_clean_in_block(expr, block, info->antic_in))
1135 ir_valueset_replace(info->antic_in, value, expr);
1140 DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1142 if (size != ir_valueset_size(info->antic_in))
1146 /* --------------------------------------------------------
1147 * Main algorithm Avail_out
1148 * --------------------------------------------------------
1152 * Computes Avail_out(block):
1154 * Avail_in(block) = Avail_out(dom(block))
1155 * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1158 * This function must be called in the top-down topological order:
1159 * Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1161 * @param block the block
1162 * @param ctx walker context
1164 static void compute_avail_top_down(ir_node *block, void *ctx)
1166 pre_env *env = (pre_env*)ctx;
1169 if (block == env->end_block)
1172 info = get_block_info(block);
1174 /* Add all nodes from the immediate dominator.
1175 This ensures that avail_out contains the leader. */
1176 if (block != env->start_block) {
1177 ir_node *dom_block = get_Block_idom(block);
1178 block_info *dom_info = get_block_info(dom_block);
1181 ir_valueset_iterator_t iter;
1183 foreach_valueset(dom_info->avail_out, value, expr, iter)
1184 /* replace: use the leader from dominator, not local exp_gen */
1185 ir_valueset_replace(info->avail_out, value, expr);
1188 DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1191 /* --------------------------------------------------------
1192 * Main algorithm redundancy detection
1193 * --------------------------------------------------------
1197 * Returns a valid mode if the value of expr is a partially redundant value.
1199 * @param block the block
1200 * @param expr the expression
1202 * @return mode of the expression if it is partially redundant else NULL
1204 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1206 ir_node *first_avail = NULL;
1208 int arity = get_irn_arity(block);
1209 int fully_redundant = 1;
1210 int partially_redundant = 0;
1211 ir_mode *mode = NULL;
1213 DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1215 /* for each predecessor blocks */
1216 for (pos = 0; pos < arity; ++pos) {
1217 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1218 block_info *pred_info;
1219 ir_node *trans_expr;
1220 ir_node *trans_value;
1221 ir_node *avail_expr;
1223 pred_info = get_block_info(pred_block);
1224 trans_expr = get_translated(pred_block, expr);
1225 trans_value = identify(trans_expr);
1227 if (is_Const(trans_expr))
1228 avail_expr = trans_expr;
1230 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1232 /* value might be available through a not yet existing constant */
1233 if (avail_expr == NULL && is_Const(trans_expr)) {
1234 /* limit range of new constants */
1235 ir_mode *cmode = get_irn_mode(trans_expr);
1236 ir_tarval *upper = new_tarval_from_long(127, cmode);
1237 ir_tarval *lower = new_tarval_from_long(-127, cmode);
1238 ir_tarval *c = get_Const_tarval(trans_expr);
1240 /* tarval within range? */
1241 if (tarval_cmp(lower, c) == ir_relation_less_equal &&
1242 tarval_cmp(c, upper) == ir_relation_less_equal) {
1243 avail_expr = trans_expr;
1249 DB((dbg, LEVEL_3, "avail_expr %+F trans_expr %+F\n", avail_expr, trans_expr));
1251 if (avail_expr == NULL) {
1252 pred_info->avail = trans_expr;
1253 pred_info->found = 0;
1254 fully_redundant = 0;
1256 /* expr is available, use the leader */
1257 pred_info->avail = avail_expr;
1258 pred_info->found = 1;
1259 mode = get_irn_mode(avail_expr);
1260 partially_redundant = 1;
1262 if (first_avail == NULL)
1263 first_avail = avail_expr;
1264 else if (first_avail != avail_expr)
1265 /* Multiple different expressions are available,
1266 This is why we need no cut over avail_out sets. */
1267 fully_redundant = 0;
1269 DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1274 /* value is redundant from last iteration,
1275 but has not been removed from antic_in (is not optimized) */
1276 if (! environ->first_iter && is_redundant(block, expr))
1280 /* If it is not the same value already existing along every predecessor
1281 and it is defined by some predecessor then it is partially redundant. */
1282 if (! partially_redundant || fully_redundant)
1288 * Updates the new_set of a block by adding the new_set of
1289 * the immediate dominating block.
1293 static void update_new_set(ir_node *block, ir_node *idom)
1297 ir_valueset_iterator_t iter;
1298 block_info *curr_info = get_block_info(block);
1299 block_info *idom_info = get_block_info(idom);
1302 DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1303 foreach_valueset(idom_info->new_set, value, expr, iter) {
1304 /* inherit new_set from immediate dominator */
1305 ir_valueset_insert(curr_info->new_set, value, expr);
1306 /* replace in avail_out */
1307 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1309 #ifdef DEBUG_libfirm
1311 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1313 } /* update_new_set */
1317 * Returns redundant flag of node irn in block block.
1319 static unsigned is_redundant(ir_node *block, ir_node *irn)
1324 /* TODO Needs to use a flag, because antic_done should only be used
1325 if node is finally processed by insert_nodes. */
1331 * Checks if hoisting irn is greedy.
1332 * Greedy hoisting means that there are non partially redundant nodes
1333 * hoisted. This happens if a partially redundant node has
1334 * non redundant predecessors.
1336 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1338 int block_arity = get_irn_arity(block);
1339 int arity = get_irn_arity(irn);
1341 block_info *info = get_block_info(block);
1343 /* As long as the predecessor values are available in all predecessor blocks,
1344 we can hoist this value. */
1345 for (pos = 0; pos < block_arity; ++pos) {
1346 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1347 block_info *pred_info = get_block_info(pred_block);
1349 for (i = 0; i < arity; ++i) {
1350 ir_node *pred = get_irn_n(irn, i);
1358 /* Very conservative min cut. Phi might only have 1 user. */
1359 if (is_Phi(pred) && get_irn_n_edges(pred) != 1)
1363 if (is_Phi(pred) && get_nodes_block(pred) == block)
1366 DB((dbg, LEVEL_3, "pred %+F\n", pred));
1367 value = identify(pred);
1368 leader = ir_valueset_lookup(info->antic_in, value);
1371 DB((dbg, LEVEL_3, "lead %+F\n", leader));
1372 trans = get_translated(pred_block, leader);
1375 DB((dbg, LEVEL_3, "trans %+F\n", trans));
1377 trans_val = identify(trans);
1378 DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1380 if (is_Const(trans_val) || is_SymConst(trans_val)) {
1381 /* existing constant */
1382 if (get_irn_idx(trans_val) < environ->last_idx) {
1385 /* limit range of new constants */
1386 ir_mode *cmode = get_irn_mode(trans);
1387 ir_tarval *upper = new_tarval_from_long(128, cmode);
1388 ir_tarval *lower = new_tarval_from_long(-128, cmode);
1389 ir_tarval *c = get_Const_tarval(trans);
1391 /* tarval within range? */
1392 if (tarval_cmp(lower, c) == ir_relation_less &&
1393 tarval_cmp(c, upper) == ir_relation_less) {
1402 if (is_irn_constlike(trans_val))
1405 avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1407 DB((dbg, LEVEL_3, "avail %+F\n", avail));
1411 /* only optimize if predecessors have been optimized */
1412 if (ir_valueset_lookup(info->antic_done, value) == NULL)
1421 * Perform insertion of partially redundant values.
1422 * For every block node, do the following:
1423 * 1. Propagate the NEW_SETS of the dominator into the current block.
1424 * If the block has multiple predecessors,
1425 * 2a. Iterate over the ANTIC expressions for the block to see if
1426 * any of them are partially redundant.
1427 * 2b. If so, insert them into the necessary predecessors to make
1428 * the expression fully redundant.
1429 * 2c. Insert a new Phi merging the values of the predecessors.
1430 * 2d. Insert the new Phi, and the new expressions, into the
1433 * @param block the block
1434 * @param ctx the walker environment
1436 static void insert_nodes_walker(ir_node *block, void *ctx)
1438 pre_env *env = (pre_env*)ctx;
1439 int arity = get_irn_arity(block);
1445 ir_valueset_iterator_t iter;
1451 if (! is_Block(block))
1454 /* ensure that even the start block has a new_set */
1455 info = get_block_info(block);
1457 ir_valueset_del(info->new_set);
1458 info->new_set = ir_valueset_new(16);
1460 if (block == env->start_block)
1463 DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1465 idom = get_Block_idom(block);
1466 update_new_set(block, idom);
1468 /* process only path joining blocks */
1474 stack = plist_new();
1475 foreach_valueset(info->antic_in, value, expr, iter) {
1476 /* inverse topologic */
1477 plist_insert_front(stack, expr);
1481 /* This is the main reason antic_in is preverred over antic_out;
1482 we may iterate over every anticipated value first and not
1483 over the predecessor blocks. */
1484 foreach_valueset(info->antic_in, value, expr, iter) {
1490 if (ir_valueset_lookup(info->antic_done, value))
1493 /* filter phi nodes from antic_in */
1497 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1499 /* A value computed in the dominator is totally redundant.
1500 Hence we have nothing to insert. */
1501 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1502 DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1503 DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1505 ir_valueset_insert(info->antic_done, value, expr);
1510 if (is_hoisting_greedy(expr, block)) {
1511 DB((dbg, LEVEL_2, "greedy\n"));
1516 mode = is_partially_redundant(block, expr, value);
1521 if (is_hoisting_greedy(expr, block)) {
1522 DB((dbg, LEVEL_2, "Better greed: greedy\n"));
1527 #if LOADS || OLD_DIVMODS || DIVMODS
1528 /* save old mode_M phis to remove keepalive edges later */
1529 if (is_memop(expr)) {
1530 ir_node *mem = get_memop_mem(expr);
1531 if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1532 ir_nodeset_insert(env->keeps, mem);
1537 #ifdef DEBUG_libfirm
1538 if (! is_Proj(expr)) {
1539 if (env->first_iter)
1540 inc_stats(gvnpre_stats->first_iter_found);
1541 inc_stats(gvnpre_stats->partially);
1543 if (is_Load(expr) || is_Store(expr))
1544 inc_stats(gvnpre_stats->loads);
1545 else if (is_Div(expr) || is_Mod(expr))
1546 inc_stats(gvnpre_stats->divmods);
1549 phi_in = XMALLOCN(ir_node *, arity);
1551 /* for each predecessor block */
1552 for (pos = 0; pos < arity; ++pos) {
1553 ir_node *pred_block = get_Block_cfgpred_block(block, pos);
1554 block_info *pred_info;
1556 pred_info = get_block_info(pred_block);
1558 if (! pred_info->found) {
1560 int node_arity = get_irn_arity(expr);
1561 ir_node **in = XMALLOCNZ(ir_node *, node_arity);
1563 ir_node *new_value, *new_value2;
1564 ir_node *target_block = pred_block;
1566 for (i = 0; i < node_arity; ++i) {
1567 ir_node *pred = get_irn_n(expr, i);
1568 ir_node *value = identify(pred);
1574 /* transform knowledge over the predecessor from
1575 anti-leader world into leader world. */
1577 DB((dbg, LEVEL_3, "pred %+F\n", pred));
1578 value = identify(pred);
1580 /* get leader for pred to lookup its translated value */
1581 leader = ir_valueset_lookup(info->antic_in, value);
1584 DB((dbg, LEVEL_3, "lead %+F\n", leader));
1586 trans = get_translated(pred_block, leader);
1589 DB((dbg, LEVEL_3, "trans %+F\n", trans));
1591 /* in case of phi, we are done */
1592 if (is_Phi(pred) && get_nodes_block(pred) == block) {
1597 trans_val = identify(trans);
1598 DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1600 /* constants are always available but not in avail set */
1601 if (is_irn_constlike(trans_val)) {
1607 In case of loads we need to make sure the hoisted
1608 loads are found despite their unique value. */
1609 avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1610 DB((dbg, LEVEL_3, "avail %+F\n", avail));
1612 assert(avail && "predecessor has to be available");
1617 target_block = get_nodes_block(in[0]);
1619 /* Copy node to represent the new value.
1620 We use translated nodes as value representatives only.
1621 They have anti leaders as predecessors, not leaders!
1622 So we have to create a new node using leaders. */
1623 trans = new_ir_node(
1624 get_irn_dbg_info(expr),
1629 get_irn_arity(expr),
1632 /* We need the attribute copy here, because the Hash value of a
1633 node might depend on it. */
1634 copy_node_attr(environ->graph, expr, trans);
1636 /* value is now available in target block through trans
1637 insert (not replace) because it has not been available */
1638 new_value = identify_or_remember(trans);
1639 ir_valueset_insert(pred_info->avail_out, new_value, trans);
1640 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value));
1642 new_value2 = identify(get_translated(pred_block, expr));
1643 ir_valueset_insert(pred_info->avail_out, new_value2, trans);
1644 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value2));
1646 DB((dbg, LEVEL_3, "Use new %+F in %+F because %+F(%+F) not available\n", trans, pred_block, expr, value));
1648 phi_in[pos] = trans;
1650 /* value available */
1651 phi_in[pos] = pred_info->avail;
1653 DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1656 /* We do not connect tuples as they will be connected automatically
1657 by the corresponding projections. */
1658 if (get_irn_mode(expr) != mode_T) {
1660 phi = new_r_Phi(block, arity, phi_in, mode);
1661 DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1663 /* This value is now available through the new phi.
1664 insert || replace in avail_out */
1665 ir_valueset_replace(info->avail_out, value, phi);
1666 ir_valueset_insert(info->new_set, value, phi);
1670 /* already optimized this value in this block */
1671 ir_valueset_insert(info->antic_done, value, expr);
1677 Better greed first determines which values are redundant
1678 and decides then which to take.
1679 insert_nodes needs to be split up for that. The cycle could be
1680 for each block: flag redundant nodes,
1681 use heuristic to adjust these flags (also consider antic_done),
1683 This way we could decide if we should hoist a non redundant node,
1684 if all its successors are redundant.
1685 Or we might try to minimize the cut along hoisted nodes and their
1686 non redundant successors.
1689 plist_element_t *it;
1690 /* iterate in inverse topological order */
1691 foreach_plist(stack, it) {
1692 ir_node *irn = (ir_node *)plist_element_get_value(it);
1693 ir_node *block = get_nodes_block(irn);
1697 /* does irn only have redundant successors? */
1699 foreach_out_edge(irn, edge) {
1700 ir_node *succ = get_edge_src_irn(edge);
1702 /* if succ and irn are in the same block */
1703 if (get_nodes_block(succ) == block && is_redundant(block, succ)) {
1712 flag_redundant(irn, 1);
1721 static void update_new_set_walker(ir_node *block, void *ctx)
1723 pre_env *env = (pre_env*)ctx;
1725 if (! is_Block(block))
1727 if (block == env->start_block)
1730 update_new_set(block, get_Block_idom(block));
1734 * Domtree block walker to insert nodes with dying operands
1735 * into the highest possible block whilst still being anticipated.
1737 static void hoist_high(ir_node *block, void *ctx)
1739 pre_env *env = (pre_env*)ctx;
1740 block_info *curr_info;
1741 ir_valueset_iterator_t iter;
1744 int arity = get_irn_arity(block);
1746 if (! is_Block(block))
1749 curr_info = get_block_info(block);
1751 if (curr_info->new_set)
1752 ir_valueset_del(curr_info->new_set);
1753 curr_info->new_set = ir_valueset_new(16);
1755 if (block == env->start_block)
1761 DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1763 /* foreach entry optimized by insert node phase */
1764 foreach_valueset(curr_info->antic_done, value, expr, iter) {
1767 /* TODO currently we cannot handle load and their projections */
1768 if (is_memop(expr) || is_Proj(expr))
1771 DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
1773 /* visit hoisted expressions */
1774 for (pos = 0; pos < arity; ++pos) {
1775 /* standard target is predecessor block */
1776 ir_node *target = get_Block_cfgpred_block(block, pos);
1777 block_info *pred_info = get_block_info(target);
1779 ir_node *new_target;
1780 ir_node *trans_expr;
1781 ir_node *trans_value;
1785 unsigned nest_depth;
1786 block_info *dom_info;
1788 /* get phi translated value */
1789 trans_expr = get_translated(target, expr);
1790 trans_value = identify(trans_expr);
1791 avail = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1793 /* get the used expr on this path */
1795 /* TODO when does this happen? */
1799 avail_arity = get_irn_arity(avail);
1800 value = identify(avail);
1802 /* anticipation border */
1804 nest_depth = get_loop_depth(get_irn_loop(target));
1806 /* Either push the hoisted nodes up their path,
1807 or try to put them directly into their common dominator. */
1809 /* By using block (instead of target) as initial block,
1810 we only allow hoisting into a common block of
1811 both predecessor blocks. */
1817 while (dom && dom != get_Block_idom(block)) {
1819 dom = get_Block_idom(dom);
1820 dom_info = get_block_info(dom);
1821 DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
1823 /* TODO Being in antic_in means hoistable above block,
1824 but we need 'hoistable into block'.
1825 This could be achieved by a flag for each valueset pair,
1826 being set during antic computation. */
1828 /* check if available node ist still anticipated and clean */
1829 if (! ir_valueset_lookup(dom_info->antic_in, value)) {
1830 DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
1834 nest_depth = get_loop_depth(get_irn_loop(dom));
1836 /* do not hoist into loops */
1837 if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
1838 DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
1839 /* not a suitable location */
1843 /* check if operands die */
1845 /* check for uses on current path */
1846 for (i = 0; i < avail_arity; i++) {
1847 ir_node *pred = get_irn_n(avail, i);
1848 ir_node *pred_value = identify(pred);
1853 DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
1855 if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
1856 DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
1861 /* check every successor */
1862 foreach_out_edge(pred, edge) {
1863 ir_node *succ = get_edge_src_irn(edge);
1864 DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
1866 /* check only successors on current path to end */
1867 if (block_dominates(dom, get_nodes_block(succ))) {
1868 ir_node *succ_value = identify(succ);
1870 /* Do we have another user than avail?
1871 Then predecessor is not dead after removal of avail. */
1872 if (succ_value != value) {
1873 DB((dbg, LEVEL_4, "still used in %+F\n", succ));
1884 /* only try common dominator */
1889 /* put node into new target block */
1891 block_info *target_info = get_block_info(new_target);
1892 int nn_arity = get_irn_arity(avail);
1893 ir_node **in = XMALLOCN(ir_node *, nn_arity);
1897 DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
1898 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1900 for (i = 0; i < nn_arity; ++i) {
1901 ir_node *pred = get_irn_n(avail, i);
1902 ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
1907 get_irn_dbg_info(avail),
1911 get_irn_mode(avail),
1916 identify_or_remember(nn);
1917 /* TODO Nodes are inserted into a dominating block and should
1918 be available from this point on. Currently we do not push
1919 the availability information through during the walk. */
1920 ir_valueset_insert(target_info->new_set, value, nn);
1921 ir_valueset_insert(target_info->avail_out, value, nn);
1928 /* --------------------------------------------------------
1929 * Elimination of fully redundant nodes
1930 * --------------------------------------------------------
1934 * Walker which finds redundant nodes using avail_out sets
1935 * and exchanges them for existing ones.
1936 * We cannot change the graph here as this would affect
1937 * the hash values of the nodes.
1939 * @param irn the node
1940 * @param ctx the walker environment
1942 static void eliminate(ir_node *irn, void *ctx)
1944 pre_env *env = (pre_env*)ctx;
1946 if (! is_Block(irn)) {
1947 ir_node *block = get_nodes_block(irn);
1948 block_info *info = get_block_info(block);
1949 ir_node *value = identify(irn);
1951 if (value != NULL) {
1952 ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1953 DB((dbg, LEVEL_3, "Elim %+F(%+F) avail %+F\n", irn, value, expr));
1955 if (expr != NULL && expr != irn) {
1956 elim_pair *p = OALLOC(env->obst, elim_pair);
1960 p->next = env->pairs;
1961 if (get_irn_idx(expr) > env->last_idx)
1962 p->reason = FS_OPT_GVN_PARTLY;
1964 p->reason = FS_OPT_GVN_FULLY;
1966 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1973 * Do all the recorded changes and optimize
1974 * newly created Phi's.
1976 * @param pairs list of elimination pairs
1978 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1981 ir_node *end = environ->end_node;
1983 for (p = pairs; p != NULL; p = p->next) {
1984 /* might be already changed */
1985 p->new_node = skip_Id(p->new_node);
1987 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1989 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1990 * which we can optimize here */
1991 if (is_Phi(p->new_node)) {
1993 ir_node *res = NULL;
1995 for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1996 ir_node *pred = get_irn_n(p->new_node, i);
1998 if (pred != p->old_node) {
2007 exchange(p->new_node, res);
2011 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
2013 exchange(p->old_node, p->new_node);
2016 /* remove keep alive edges of unused mode_M phis */
2017 foreach_ir_nodeset(keeps, m_phi, iter) {
2018 remove_End_keepalive(end, m_phi);
2020 } /* eliminate_nodes */
2023 /* --------------------------------------------------------
2025 * --------------------------------------------------------
2029 * Gvn_Pre algorithm.
2031 * @param irg the graph
2032 * @param env the environment
2034 static void gvn_pre(ir_graph *irg, pre_env *env)
2036 unsigned antic_iter;
2037 unsigned insert_iter;
2039 DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
2041 /* allocate block info */
2042 irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
2044 ir_nodehashmap_init(&value_map);
2046 /* generate exp_gen */
2047 irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
2048 dump_all_expgen_sets(env->list);
2050 /* compute the avail_out sets for all blocks */
2051 dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
2053 /* compute the anticipated value sets for all blocks */
2055 env->first_iter = 1;
2058 /* antic_in passes */
2061 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
2063 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
2064 env->first_iter = 0;
2065 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
2067 } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
2069 DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
2071 ir_nodeset_init(env->keeps);
2073 env->first_iter = 1;
2074 /* compute redundant expressions */
2077 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
2079 /* TODO topologically top down would be better; fewer iterations. */
2080 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
2081 env->first_iter = 0;
2082 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
2083 } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
2084 DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
2087 /* An attempt to reduce lifetimes by hoisting already hoisted values
2088 even higher if their operands die. */
2089 dom_tree_walk_irg(irg, hoist_high, NULL, env);
2090 /* update avail_out for elimination */
2091 dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
2094 /* Deactivate edges to prevent intelligent removal of nodes,
2095 or else we will get deleted nodes which we try to exchange. */
2096 edges_deactivate(environ->graph);
2098 /* eliminate nodes */
2099 irg_walk_graph(irg, NULL, eliminate, env);
2100 eliminate_nodes(env->pairs, env->keeps);
2102 ir_nodeset_destroy(env->keeps);
2106 * Gvn_Pre pass for graph irg.
2108 * @param irg the graph
2110 void do_gvn_pre(ir_graph *irg)
2112 struct obstack obst;
2115 optimization_state_t state;
2116 block_info *block_info;
2118 /* bads and unreachables cause too much trouble with dominance,
2119 loop info for endless loop detection,
2120 no critical edges is PRE precondition
2122 assure_irg_properties(irg,
2123 IR_GRAPH_PROPERTY_NO_BADS
2124 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
2125 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
2126 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
2127 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
2128 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
2130 /* register a debug mask */
2131 FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
2133 save_optimization_state(&state);
2134 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2136 edges_activate(irg);
2139 DEBUG_ONLY(init_stats();)
2141 /* setup environment */
2142 obstack_init(&obst);
2146 env.start_block = get_irg_start_block(irg);
2147 env.end_block = get_irg_end_block(irg);
2148 env.end_node = get_irg_end(irg);
2151 env.last_idx = get_irg_last_idx(irg);
2153 /* Detect and set links of infinite loops to non-zero. */
2157 new_identities(irg);
2158 env.value_table = irg->value_table;
2159 irg->value_table = NULL;
2162 /* Switch on GCSE. We need it to correctly compute
2163 the value of a node, which is independent from
2165 set_opt_global_cse(1);
2166 /* new_identities() */
2167 if (irg->value_table != NULL)
2168 del_pset(irg->value_table);
2169 /* initially assumed nodes in pset are 512 */
2170 irg->value_table = new_pset(compare_gvn_identities, 512);
2172 env.gvnpre_values = irg->value_table;
2175 /* do GVN-PRE pass */
2177 DEBUG_ONLY(print_stats();)
2179 /* clean up: delete all sets */
2180 for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2181 free_block_info(block_info);
2184 DEBUG_ONLY(free_stats();)
2185 ir_nodehashmap_destroy(&value_map);
2186 obstack_free(&obst, NULL);
2187 ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2189 /* Pin the graph again.
2190 This is needed due to the use of set_opt_global_cse(1) */
2191 set_irg_pinned(irg, op_pin_state_pinned);
2192 restore_optimization_state(&state);
2193 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2196 irg->value_table = env.value_table;
2197 del_pset(irg->value_table);
2198 irg->value_table = env.gvnpre_values;
2201 /* TODO There seem to be optimizations that try to use the existing
2203 new_identities(irg);
2205 /* TODO assure nothing else breaks. */
2206 set_opt_global_cse(0);
2207 edges_activate(irg);
2210 /* Creates an ir_graph pass for do_gvn_pre. */
2211 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2213 return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);