Fixed warning: Made global variable static.
[libfirm] / ir / opt / gvn_pre.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Global Value Numbering Partial Redundancy Elimination
23  *          (VanDrunen Hosking 2004)
24  * @author  Michael Beck
25  * @brief
26  */
27 #include "config.h"
28
29 #include "debug.h"
30 #include "ircons.h"
31 #include "irdom.h"
32 #include "iredges.h"
33 #include "irflag.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
41 #include "irouts.h"
42 #include "irpass.h"
43 #include "valueset.h"
44 #include "irloop.h"
45
46 #include "irgraph_t.h"
47 #include "irnode_t.h"
48 #include "iropt_t.h"
49 #include "plist.h"
50
51 /* suggested by GVN-PRE authors */
52 #define MAX_ANTIC_ITER 10
53 #define MAX_INSERT_ITER 3
54
55 /* Stops antic iteration from processing endless loops. */
56 #define IGNORE_INF_LOOPS 1
57 /* Stops antic information to flow over infinite loop backedge */
58 #define NO_INF_LOOPS 0
59
60 /* Attempt to reduce register pressure and reduce code size
61    for hoisted nodes. */
62 #define HOIST_HIGH 1
63 #define COMMON_DOM 1
64
65 /* Seamless implementation of handling loads and generally memory
66    dependent nodes with GVN-PRE. */
67 #define LOADS 1
68 #define DIVMODS 0
69
70 /* Experimental */
71
72 /* Only optimize node directly after phi node if node is only successor. */
73 #define MIN_CUT 0
74
75 /* GVN is very limited. This enables optimize_node during value recognition.
76    GVN ist still very limited, since a+1+b+1 doesn't equal a+b+2.
77    TODO Broken for yet unknown reasons. */
78 #define OPTIMIZE_NODES 0
79
80
81 /** Additional info we need for every block. */
82 typedef struct block_info {
83         ir_valueset_t     *exp_gen;    /* contains this blocks clean expressions */
84         ir_valueset_t     *avail_out;  /* available values at block end */
85         ir_valueset_t     *antic_in;   /* clean anticipated values at block entry */
86         ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert nodes phase */
87         ir_valueset_t     *new_set;    /* new by hoisting made available values */
88         ir_nodehashmap_t  *trans;      /* contains translated nodes translated into block */
89         ir_node           *avail;      /* saves available node for insert node phase */
90         int                found;      /* saves kind of availability for insert_node phase */
91         ir_node           *block;      /* block of the block_info */
92         struct block_info *next;       /* links all instances for easy access */
93 } block_info;
94
95 /**
96  * A pair of nodes to be exchanged.
97  * We have to defer the exchange because there are still needed references
98  * to certain nodes.
99  */
100 typedef struct elim_pair {
101         ir_node *old_node;      /* node that will be replaced */
102         ir_node *new_node;      /* replacement for old_node */
103         struct elim_pair *next; /* links all instances for easy access */
104         int     reason;         /* reason for the replacement */
105 } elim_pair;
106
107 /** environment for the GVN-PRE algorithm */
108 typedef struct pre_env {
109         ir_graph       *graph;        /* current graph */
110         struct obstack *obst;         /* obstack to allocate on */
111         ir_node        *start_block;  /* start block of the current graph */
112         ir_node        *end_block;    /* end block of the current graph */
113         ir_node        *end_node;     /* end node of the current graph */
114         block_info     *list;         /* block_info list head */
115         elim_pair      *pairs;        /* elim_pair list head */
116         ir_nodeset_t   *keeps;        /* a list of to be removed phis to kill their keep alive edges */
117         unsigned        last_idx;     /* last node index of input graph */
118         char            changes;      /* flag for fixed point iterations - non-zero if changes occurred */
119         char            first_iter;   /* non-zero for first fixed point iteration */
120         int             iteration;    /* iteration counter */
121 #if OPTIMIZE_NODES
122         pset           *value_table;   /* standard value table*/
123         pset           *gvnpre_values; /* gvnpre value table */
124 #endif
125 } pre_env;
126
127 static pre_env *environ;
128
129 /* custom GVN value map */
130 static ir_nodehashmap_t value_map;
131
132 /* debug module handle */
133 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
134
135 #ifdef DEBUG_libfirm
136
137 /* --------------------------------------------------------
138  * Statistics
139  * --------------------------------------------------------
140  */
141
142 typedef struct gvnpre_statistics {
143         int replaced;
144         int partially;
145         int fully;
146         int loads;
147         int divmods;
148         int hoist_high;
149         int first_iter_found;
150         int antic_iterations;
151         int insert_iterations;
152         int infinite_loops;
153 } gvnpre_statistics;
154
155 static gvnpre_statistics *gvnpre_stats = NULL;
156
157 static void init_stats(void)
158 {
159         gvnpre_stats = XMALLOCZ(gvnpre_statistics);
160 }
161
162 static void free_stats(void)
163 {
164         free(gvnpre_stats);
165         gvnpre_stats = NULL;
166 }
167
168 static void print_stats(void)
169 {
170         gvnpre_statistics *stats = gvnpre_stats;
171         DB((dbg, LEVEL_1, "replaced             : %d\n", stats->replaced));
172         DB((dbg, LEVEL_1, "antic_in iterations  : %d\n", stats->antic_iterations));
173         DB((dbg, LEVEL_1, "insert iterations    : %d\n", stats->insert_iterations));
174         DB((dbg, LEVEL_1, "infinite loops       : %d\n", stats->infinite_loops));
175         DB((dbg, LEVEL_1, "fully redundant      : %d\n", stats->fully));
176         DB((dbg, LEVEL_1, "partially redundant  : %d\n", stats->partially));
177         DB((dbg, LEVEL_1, "  loads                : %d\n", stats->loads));
178         DB((dbg, LEVEL_1, "  Divs/Mods            : %d\n", stats->divmods));
179         DB((dbg, LEVEL_1, "  hoist high           : %d\n", stats->hoist_high));
180         DB((dbg, LEVEL_1, "  first iteration      : %d\n", stats->first_iter_found));
181 }
182
183 #define set_stats(var, value) (var)=(value)
184 #define inc_stats(var)        ((var)+=1)
185
186 /* --------------------------------------------------------
187  * Dump valuesets
188  * --------------------------------------------------------
189  */
190
191 /**
192  * Dump a value set.
193  *
194  * @param set    the set to dump
195  * @param txt    a text to describe the set
196  * @param block  the owner block of the set
197  */
198 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
199 {
200         ir_valueset_iterator_t iter;
201         ir_node *value, *expr;
202         int i;
203
204         DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
205         i = 0;
206         foreach_valueset(set, value, expr, iter) {
207                 if ((i & 3) == 3)
208                         DB((dbg, LEVEL_2, "\n"));
209                 if (value != expr)
210                         DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
211                 else
212                         DB((dbg, LEVEL_2, " %+F,", expr));
213                 ++i;
214         }
215         DB((dbg, LEVEL_2, "\n}\n"));
216 }  /* dump_value_set */
217
218 /**
219  * Dump all exp_gen value sets.
220  *
221  * @param list  the list of block infos to retrieve the sets from
222  */
223 static void dump_all_expgen_sets(block_info *list)
224 {
225         block_info *block_info;
226
227         for (block_info = list; block_info != NULL; block_info = block_info->next) {
228                 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
229         }
230 }
231
232 #else
233
234 #define dump_all_expgen_sets(list)
235 #define dump_value_set(set, txt, block)
236
237 #endif /* DEBUG_libfirm */
238
239 /* --------------------------------------------------------
240  * GVN Functions
241  * --------------------------------------------------------
242  */
243
244 /**
245  * Compares node collisions in valuetable.
246  * Modified identities_cmp().
247  */
248 static int compare_gvn_identities(const void *elt, const void *key)
249 {
250         ir_node *a = (ir_node *)elt;
251         ir_node *b = (ir_node *)key;
252         int i, irn_arity_a;
253
254         if (a == b) return 0;
255
256         /* phi nodes kill predecessor values and are always different */
257         if (is_Phi(a) || is_Phi(b))
258                 return 1;
259
260         /* memops are not the same, even if we want to optimize them
261            we have to take the order in account */
262         if (is_memop(a) || is_memop(b) ||
263             get_irn_mode(a) == mode_T ||
264             get_irn_mode(b) == mode_T) {
265                 /* Loads with the same predecessors are the same value;
266                    this should only happen after phi translation. */
267                 if ((! is_Load(a) || ! is_Load(b)) && (! is_Store(a) || ! is_Store(b)))
268                         return 1;
269         }
270
271         if ((get_irn_op(a) != get_irn_op(b)) ||
272             (get_irn_mode(a) != get_irn_mode(b))) return 1;
273
274         /* compare if a's in and b's in are of equal length */
275         irn_arity_a = get_irn_arity(a);
276         if (irn_arity_a != get_irn_arity(b))
277                 return 1;
278
279         /* blocks are never the same */
280         if (is_Block(a) || is_Block(b))
281                 return 1;
282
283         /* should only be used with gcse enabled */
284         assert(get_opt_global_cse());
285
286         /* compare a->in[0..ins] with b->in[0..ins] */
287         for (i = 0; i < irn_arity_a; ++i) {
288                 ir_node *pred_a = get_irn_n(a, i);
289                 ir_node *pred_b = get_irn_n(b, i);
290                 if (pred_a != pred_b) {
291                         if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
292                                 return 1;
293                 }
294         }
295
296         /*
297          * here, we already now that the nodes are identical except their
298          * attributes
299          */
300         if (a->op->ops.node_cmp_attr)
301                 return a->op->ops.node_cmp_attr(a, b);
302
303         return 0;
304 }
305
306 /**
307  * Identify does a lookup in the GVN valuetable.
308  * To be used when no new GVN values are to be created.
309  *
310  * @param e  a node representing an expression
311  * @return a node representing the value
312  */
313 static ir_node *identify(ir_node *irn)
314 {
315         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
316         if (value)
317                 return value;
318         /* irn represents a new value, so return the leader */
319         return identify_remember(irn);
320 }
321
322 /**
323  * remember() adds node irn to the GVN valuetable.
324  * Identify_remember only identifies values of nodes with the
325  * same predecessor nodes (not values). By creating a node from the predecessor
326  * values/leaders, a true valuetree is built. Phis kill their predecessor value,
327  * so no circular dependencies need to be resolved.
328  *
329  * TODO Improvement:
330  *      Maybe this could be implemented with a custom node hash that takes
331  *      phi nodes and true values (instead of predecessors) into account,
332  *      resulting in value numbers.
333  * TODO This unnecessarily also handles nodes like calls, which are never equal.
334  *
335  * @param irn  a node representing an expression
336  * @return     the value of the expression
337  */
338 static ir_node *remember(ir_node *irn)
339 {
340         int       arity   = get_irn_arity(irn);
341         int       i;
342         int       changed = 0;
343         ir_node **in      = XMALLOCN(ir_node *, arity);
344         ir_node  *value;
345
346         for (i = 0; i < arity; ++i) {
347                 ir_node *pred       = get_irn_n(irn, i);
348                 /* value and leader at the same time */
349                 ir_node *pred_value = identify(pred);
350
351                 /* phi will be translated anyway, so kill the predecessor values
352                    this also prevents circular dependencies */
353                 if (is_Phi(pred)) {
354                         /* every phi represents its own value */
355                         in[i] = pred;
356                         continue;
357                 }
358
359                 /* predecessor is not its value representation/the leader */
360                 if (pred != pred_value)
361                         changed = 1;
362                 in[i] = pred_value;
363         }
364
365         if (changed && !is_memop(irn) && get_irn_mode(irn) != mode_X) {
366                 /* create representative for */
367                 ir_node *nn = new_ir_node(
368                         get_irn_dbg_info(irn),
369                         get_irn_irg(irn),
370                         get_nodes_block(irn),
371                         get_irn_op(irn),
372                         get_irn_mode(irn),
373                         get_irn_arity(irn),
374                         in);
375                 copy_node_attr(environ->graph, irn, nn);
376
377 #if OPTIMIZE_NODES
378                 /* TODO optimize_node() uses the valuetable and thus the custom
379                    identify_cmp() and will fail trying. */
380                 environ->graph->value_table = environ->value_table;
381                 nn = optimize_node(nn);
382                 environ->graph->value_table = environ->gvnpre_values;
383 #endif
384
385                 /* now the value can be determined because the
386                    predecessors are the leaders */
387                 value = identify_remember(nn);
388         } else {
389                 value = identify_remember(irn);
390         }
391         free(in);
392
393         DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
394         ir_nodehashmap_insert(&value_map, irn, value);
395
396         return value;
397 }
398
399 /**
400  * When the value map has been built we may lookup expressions
401  * and remember them if new.
402  */
403 static ir_node *identify_or_remember(ir_node *irn)
404 {
405         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
406         if (value)
407                 return value;
408         else
409                 return remember(irn);
410 }
411
412 /* --------------------------------------------------------
413  * Block info
414  * --------------------------------------------------------
415  */
416
417 /**
418  * Allocate block info for block block.
419  *
420  * @param block   the block
421  * @param env     the environment
422  */
423 static void alloc_block_info(ir_node *block, pre_env *env)
424 {
425         block_info *info = OALLOC(env->obst, block_info);
426
427         set_irn_link(block, info);
428         info->exp_gen    = ir_valueset_new(16);
429         info->avail_out  = ir_valueset_new(16);
430         info->antic_in   = ir_valueset_new(16);
431         info->antic_done = ir_valueset_new(16);
432         info->trans = XMALLOC(ir_nodehashmap_t);
433         ir_nodehashmap_init(info->trans);
434
435         info->new_set = NULL;
436         info->avail   = NULL;
437         info->block   = block;
438         info->found   = 1;
439
440         info->next = env->list;
441         env->list  = info;
442 }  /* alloc_block_info */
443
444 static void free_block_info(block_info *block_info)
445 {
446         ir_valueset_del(block_info->exp_gen);
447         ir_valueset_del(block_info->avail_out);
448         ir_valueset_del(block_info->antic_in);
449         if (block_info->trans) {
450                 ir_nodehashmap_destroy(block_info->trans);
451                 free(block_info->trans);
452         }
453         if (block_info->new_set)
454                 ir_valueset_del(block_info->new_set);
455 }
456
457 /**
458  * Bottom up walker that ensures that every block gets a block info.
459  *
460  * @param irn   the node
461  * @param ctx   the environment
462  */
463 static void block_info_walker(ir_node *irn, void *ctx)
464 {
465         if (is_Block(irn)) {
466                 pre_env *env = (pre_env*)ctx;
467                 alloc_block_info(irn, env);
468         }
469 }
470
471 /**
472  * Returns the block info of a block.
473  */
474 static block_info *get_block_info(ir_node *block)
475 {
476         return (block_info*)get_irn_link(block);
477 }
478
479 /* --------------------------------------------------------
480  * Infinite loop analysis
481  * --------------------------------------------------------
482  */
483
484 /**
485  * Walker to set block marks and loop links to 0.
486  */
487 static void clear_block_mark_loop_link(ir_node *block, void *env)
488 {
489         (void) env;
490
491         if (is_Block(block)) {
492                 set_Block_mark(block, 0);
493                 set_loop_link(get_irn_loop(block), NULL);
494         }
495 }
496
497 /**
498  * Returns non-zero if block is part of real loop loop.
499  */
500
501 static unsigned in_loop(ir_node *block, ir_loop *loop)
502 {
503         ir_loop *l     = get_irn_loop(block);
504         ir_loop *outer = get_irg_loop(environ->graph);
505
506         while (l != loop) {
507                 /* loop tree root is not a loop */
508                 if (l == NULL || l == outer)
509                         return 0;
510                 l = get_loop_outer_loop(l);
511         }
512         return 1;
513 }
514
515 /**
516  * Returns the outermost real loop of loop.
517  */
518 static ir_loop *get_loop_outermost(ir_loop *loop)
519 {
520         ir_loop *outer = get_irg_loop(environ->graph);
521         ir_loop *l     = loop;
522         ir_loop *last  = NULL;
523
524         while(l != outer) {
525                 last = l;
526                 l = get_loop_outer_loop(l);
527         }
528         return last;
529 }
530
531 /**
532  * Topologic bottom-up walker sets links of infinite loops to non-zero.
533  * Block marks are used to flag blocks reachable (from end) on the one hand,
534  * on the other hand they are set if the block is not part of an infinite loop.
535  */
536 static void infinite_loop_walker(ir_node *block, void *env)
537 {
538         int arity;
539         int i;
540         (void) env;
541
542         if (! is_Block(block))
543                 return;
544
545         /* start block has no predecessors */
546         if (block == environ->start_block)
547                 return;
548
549         arity = get_irn_arity(block);
550
551         /* block not part of a real loop: no infinite loop */
552         if (get_irn_loop(block) == get_irg_loop(environ->graph))
553                 set_Block_mark(block, 1);
554
555         if (get_Block_mark(block)) {
556                 /* reachable block: mark all cf predecessors */
557                 for (i = 0; i < arity; ++i) {
558                         ir_node *pred = get_Block_cfgpred_block(block, i);
559                         if (is_Bad(pred))
560                                 continue;
561                         set_Block_mark(pred, 1);
562                 }
563         } else {
564                 /* We are in a real loop and see an unreachable block. */
565                 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
566
567                 /* flag loop as infinite */
568                 set_loop_link(outermost_loop, outermost_loop);
569                 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
570
571                 /* The cf predecessors are unreachable, but can never be part of
572                    an infinite loop, because we just reached them. So we set the
573                    blockmark to prevent triggering the infinite loop detection. */
574
575                 /* passing information to the cf predecessors */
576                 for (i = 0; i < arity; ++i) {
577                         ir_node *pred = get_Block_cfgpred_block(block, i);
578
579                         if (is_Bad(pred))
580                                 continue;
581
582                         /* If our cf predecessor is in the same endless loop,
583                            it is also unreachable. */
584                         if (in_loop(pred, outermost_loop)) {
585                                 set_Block_mark(pred, 0);
586                         } else {
587                                 /* When we leave the unreachable loop, we artificially
588                                    declare the cf predecessor reachable. */
589                                 set_Block_mark(pred, 1);
590                         }
591                 }
592         }
593 }
594
595 /**
596  * Sets loop links of outermost infinite loops to non-zero.
597  */
598 static void analyse_loops(ir_graph *irg)
599 {
600         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
601
602         /* reset block mark and loop links */
603         irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
604
605         /* mark end block reachable */
606         set_Block_mark(get_irg_end_block(irg), 1);
607         irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
608
609         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
610 }
611
612 #if IGNORE_INF_LOOPS || NO_INF_LOOPS
613 /**
614  * Returns non-zero if block is part of an infinite loop.
615  */
616 static unsigned is_in_infinite_loop(ir_node *block)
617 {
618         ir_loop *loop;
619
620         assert(is_Block(block));
621         loop = get_irn_loop(block);
622         assert(loop);
623
624         loop = get_loop_outermost(loop);
625         if (loop)
626                 return (get_loop_link(loop) != NULL);
627         else
628                 return 0;
629 }
630 #endif
631
632 /* --------------------------------------------------------
633  * GVN-PRE Exp_gen
634  * --------------------------------------------------------
635  */
636
637 /**
638  * Returns non-zero if a node is movable and a possible candidate for PRE.
639  */
640 static unsigned is_nice_value(ir_node *n)
641 {
642         ir_mode *mode = get_irn_mode(n);
643
644         if (is_Phi(n))
645                 return 1;
646
647 #if LOADS || DIVMODS
648         if (is_Proj(n) && mode != mode_X && mode != mode_T)
649                 return 1;
650 #else
651         if (is_Proj(n))
652                 return 0;
653 #endif
654
655 #if LOADS
656         if (is_Load(n))
657                 return get_Load_volatility(n) == volatility_non_volatile;
658         if (is_Store(n))
659                 return get_Store_volatility(n) == volatility_non_volatile;
660 #endif
661
662         if (get_irn_pinned(n) == op_pin_state_pinned)
663                 return 0;
664
665         if (! mode_is_data(mode)) {
666                 if (! is_Div(n) && ! is_Mod(n))
667                         return 0;
668         }
669         return 1;
670 }
671
672 /**
673  * Checks if a node n is clean in block block for exp_gen.
674  *
675  * @param n      the node
676  * @param block  the block
677  * @return non-zero value for clean node
678  */
679 static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
680 {
681         int         i, arity;
682
683         if (is_Phi(n))
684                 return 1;
685
686         if (! is_nice_value(n))
687                 return 0;
688
689 #if LOADS
690         /* filter loads with no phi predecessor from antic_in */
691         if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
692                 return 0;
693         if (is_Store(n) && ! is_Phi(get_Store_mem(n)))
694                 return 0;
695 #endif
696
697 #if DIVMODS
698         if (is_Div(n)) {
699                 ir_node *mem = get_Div_mem(n);
700
701                 mem = skip_Pin(mem);
702
703                 if (! is_Phi(mem) && ! is_NoMem(mem))
704                         return 0;
705         }
706
707         if (is_Mod(n) && ! is_Phi(get_Mod_mem(n)))
708                 return 0;
709 #endif
710
711         arity = get_irn_arity(n);
712         for (i = 0; i < arity; ++i) {
713                 ir_node *pred   = get_irn_n(n, i);
714                 ir_node *value;
715
716                 if (is_Phi(pred))
717                         continue;
718
719                 /* we only handle current block */
720                 if (get_nodes_block(pred) != block)
721                         continue;
722
723                 if (! is_nice_value(pred))
724                         return 0;
725
726                 value = identify(pred);
727                 if (! ir_valueset_lookup(valueset, value))
728                         return 0;
729
730         }
731         return 1;
732 }
733
734 /**
735  * Topological walker puts nodes in top-down topological order into exp_gen set.
736  * Assumed to walk blockwise and nodewise topologically top-down.
737  *
738  * @param irn    the node
739  * @param ctx    the environment
740  */
741 static void topo_walker(ir_node *irn, void *ctx)
742 {
743         ir_node    *block;
744         block_info *info;
745         ir_node    *value;
746         (void) ctx;
747
748         if (is_Block(irn))
749                 return;
750
751 #if OPTIMIZE_NODES
752         environ->graph->value_table = environ->value_table;
753         identify_remember(irn);
754         environ->graph->value_table = environ->gvnpre_values;
755 #endif
756
757         /* GVN step: remember the value. */
758         value = remember(irn);
759
760         if (is_irn_constlike(irn))
761                 return;
762
763         block = get_nodes_block(irn);
764         info  = get_block_info(block);
765
766         if (get_irn_mode(irn) != mode_X)
767                 ir_valueset_insert(info->avail_out, value, irn);
768
769         /* values that are not in antic_in also dont't need to be in any other set */
770
771         if (! is_nice_value(irn))
772                 return;
773
774         if (is_clean_in_block(irn, block, info->exp_gen)) {
775                 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
776
777                 ir_valueset_insert(info->exp_gen, value, irn);
778         }
779 }
780
781 /* --------------------------------------------------------
782  * GVN-PRE Antic_in
783  * --------------------------------------------------------
784  */
785
786 /**
787  * Gets result of nodes phi translation into block.
788  *
789  * @param node   the node
790  * @param block  the target block
791  *
792  * @return a phi translation of node node into block block or NULL
793  */
794 static ir_node *get_translated(ir_node *block, ir_node *node)
795 {
796         if (is_irn_constlike(node))
797                 return node;
798
799         return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
800 }
801
802 /**
803  * Saves result of phi translation of node into predecessor
804  * at pos of block succ.
805  *
806  * @param node   the node
807  * @param succ   the successor of the translation target block
808  * @param pos    the position of the predecessor block
809  * @param trans  the translation result
810  *
811  */
812 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
813 {
814         if (is_irn_constlike(node))
815                 return;
816         /* insert or replace */
817         ir_nodehashmap_insert(map, node, trans);
818 }
819
820 /**
821  * Translates an expression above a Phi.
822  *
823  * @param node        the node
824  * @param block       the block the node is translated into
825  * @param pos         the input number of the destination block
826  *
827  * @return a node representing the translated value
828  */
829 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *leaderset)
830 {
831         int       i;
832         int       arity;
833         ir_node **in;
834         ir_node  *pred_block = get_Block_cfgpred_block(block, pos);
835         ir_node  *nn;
836         unsigned  needed;
837
838         if (is_Phi(node)) {
839                 if (get_nodes_block(node) == block)
840                         return get_Phi_pred(node, pos);
841                 /* this phi does not need translation */
842                 return node;
843         }
844         arity = get_irn_arity(node);
845
846         needed = 0;
847         in = ALLOCANZ(ir_node *, arity);
848
849         /* A value has several representatives. The anti leader is chosen to be
850            the main representative. If we access a node as representative of a
851            value we always use the anti leader. The anti leader can be found by
852            antic_in(identify(node)). */
853         for (i = 0; i < arity; ++i) {
854                 ir_node *pred   = get_irn_n(node, i);
855                 ir_node *value  = identify(pred);
856                 /* get leader for pred to lookup its translated value */
857                 ir_node *leader = ir_valueset_lookup(leaderset, value);
858                 ir_node *pred_trans;
859                 ir_node *new_pred;
860
861                 if (! leader)
862                         leader = pred;
863
864                 /* we cannot find this value in antic_in, because the value
865                    has (possibly) changed! */
866                 pred_trans  = get_translated(pred_block, leader);
867
868
869 #if DIVMODS
870                 if (is_Div(node)) {
871                         ir_node *mem = get_Div_mem(node);
872
873                         mem = skip_Pin(mem);
874
875                         if (! is_Phi(mem))
876                                 pred_trans = get_Div_mem(node);
877                 }
878 #endif
879
880                 DB((dbg, LEVEL_3, "trans %+F of %+F is  %+F\n", leader, pred_block, pred_trans));
881                 if (pred_trans == NULL) {
882                         new_pred = pred;
883                 } else {
884                         new_pred = pred_trans;
885
886                         /* loads: Predecessor is a memory phi, which translated yields a proj or
887                            another phi. In case of projection and a load predecessor,
888                            skip them and use the loads memory. */
889                         if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
890 #if LOADS || DIVMODS
891                                 ir_node *loadstore = get_Proj_pred(pred_trans);
892                                 /* If we do not translate this node, we will get its value wrong. */
893                                 needed |= 1;
894
895                                 if (is_Load(loadstore)) {
896                                         /* Put new load under the adjacent loads memory edge
897                                            such that GVN may compare them. */
898                                         new_pred = get_Load_mem(loadstore);
899                                 } else if (is_Store(loadstore)) {
900                                         new_pred = get_Store_mem(loadstore);
901                                 }
902 #endif
903                         } else {
904                                 /* predecessor value changed, so translation is needed */
905                                 if (identify(new_pred) != identify(pred))
906                                         needed |= 1;
907                         }
908                 }
909
910                 DB((dbg, LEVEL_4, "in %+F\n", new_pred));
911                 in[i] = new_pred;
912         }
913
914         if (! needed)
915                 return node;
916
917         DB((dbg, LEVEL_3, "Translate\n"));
918
919         if (is_Proj(node))
920                 pred_block = get_nodes_block(in[0]);
921
922         /* copy node to represent the new value.
923            We do not translate nodes that do not need translation,
924            so we use the newly created nodes as value representatives only.
925            Their block is not important, because we create new ones during
926            the insert node phase. */
927         nn = new_ir_node(
928                 get_irn_dbg_info(node),
929                 environ->graph,
930                 pred_block,
931                 get_irn_op(node),
932                 get_irn_mode(node),
933                 arity,
934                 in);
935         /* We need the attribute copy here, because the Hash value of a
936            node might depend on it. */
937         copy_node_attr(environ->graph, node, nn);
938         /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
939            because it already uses availability. */
940
941         DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
942         return nn;
943 }
944
945 /**
946  * Block-walker, computes Antic_in(block).
947  * Builds a value tree out of the graph by translating values
948  * over phi nodes.
949  *
950  * @param block  the block
951  * @param ctx    the walker environment
952  */
953 static void compute_antic(ir_node *block, void *ctx)
954 {
955         pre_env                *env       = (pre_env*)ctx;
956         block_info             *succ_info;
957         block_info             *info;
958         ir_node                *succ;
959         ir_node                *value;
960         ir_node                *expr;
961         size_t                  size;
962         ir_valueset_iterator_t  iter;
963         int                     n_succ;
964
965         /* filter blocks from topological walker */
966         if (! is_Block(block))
967                 return;
968
969         /* the end block has no successor */
970         if (block == env->end_block)
971                 return;
972
973         info = get_block_info(block);
974         /* track changes */
975         size = ir_valueset_size(info->antic_in);
976         n_succ = get_Block_n_cfg_outs(block);
977
978         /* add exp_gen */
979         if (env->first_iter) {
980 #if IGNORE_INF_LOOPS
981                 /* keep antic_in of infinite loops empty */
982                 if (! is_in_infinite_loop(block)) {
983                         foreach_valueset(info->exp_gen, value, expr, iter) {
984                                 ir_valueset_insert(info->antic_in, value, expr);
985                         }
986                 }
987 #else
988                 foreach_valueset(info->exp_gen, value, expr, iter) {
989                         ir_valueset_insert(info->antic_in, value, expr);
990                 }
991 #endif
992         }
993
994         /* successor might have phi nodes */
995         if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
996                 succ      = get_Block_cfg_out(block, 0);
997                 int pos   = get_Block_cfgpred_pos(succ, block);
998                 succ_info = get_block_info(succ);
999
1000                 /* initialize translated set */
1001                 if (env->first_iter) {
1002                         info->trans = XMALLOC(ir_nodehashmap_t);
1003                         ir_nodehashmap_init(info->trans);
1004                 }
1005
1006                 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1007                         ir_node *trans = get_translated(block, expr);
1008                         ir_node *trans_value;
1009                         ir_node *represent;
1010
1011                         if (trans == NULL)
1012                                 trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
1013
1014                         /* create new value if necessary */
1015                         trans_value = identify_or_remember(trans);
1016
1017                         DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1018
1019                         /* On value change (phi present) we need the translated node
1020                            to represent the new value for possible further translation. */
1021                         if (value != trans_value)
1022                                 represent = trans;
1023                         else
1024                                 represent = expr;
1025
1026                         if (is_clean_in_block(expr, block, info->antic_in)) {
1027 #if NO_INF_LOOPS
1028                                 /* Prevent information flow over the backedge of endless loops. */
1029                                 if (env->iteration <= 2 || (is_backedge(succ, pos) && !is_in_infinite_loop(succ))) {
1030                                         ir_valueset_replace(info->antic_in, trans_value, represent);
1031                                 }
1032 #else
1033                                 ir_valueset_replace(info->antic_in, trans_value, represent);
1034 #endif
1035                         }
1036                         set_translated(info->trans, expr, represent);
1037                 }
1038
1039         } else if (n_succ > 1) {
1040                 int         i;
1041                 ir_node    *common     = NULL;
1042                 ir_node    *succ0      = get_Block_cfg_out(block, 0);
1043                 block_info *succ0_info = get_block_info(succ0);
1044
1045                 /* disjoint of antic_ins */
1046                 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1047                         /* iterate over remaining successors */
1048                         for (i = 1; i < n_succ; ++i) {
1049                                 ir_node    *succ      = get_Block_cfg_out(block, i);
1050                                 block_info *succ_info = get_block_info(succ);
1051
1052                                 /* value in antic_in? */
1053                                 common = ir_valueset_lookup(succ_info->antic_in, value);
1054                                 if (common == NULL)
1055                                         break;
1056                         }
1057
1058                         if (common && is_clean_in_block(expr, block, info->antic_in))
1059                                 ir_valueset_replace(info->antic_in, value, expr);
1060                 }
1061         }
1062
1063
1064         DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1065
1066         if (size != ir_valueset_size(info->antic_in))
1067                 env->changes |= 1;
1068 }
1069
1070 /* --------------------------------------------------------
1071  * Main algorithm Avail_out
1072  * --------------------------------------------------------
1073  */
1074
1075 /**
1076  * Computes Avail_out(block):
1077  *
1078  * Avail_in(block)  = Avail_out(dom(block))
1079  * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1080  *
1081  * Precondition:
1082  *  This function must be called in the top-down topological order:
1083  *  Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1084  *
1085  * @param block   the block
1086  * @param ctx     walker context
1087  */
1088 static void compute_avail_top_down(ir_node *block, void *ctx)
1089 {
1090         pre_env    *env   = (pre_env*)ctx;
1091         block_info *info;
1092
1093         if (block == env->end_block)
1094                 return;
1095
1096         info  = get_block_info(block);
1097
1098         /* Add all nodes from the immediate dominator.
1099            This ensures that avail_out contains the leader. */
1100         if (block != env->start_block) {
1101                 ir_node                *dom_block = get_Block_idom(block);
1102                 block_info             *dom_info  = get_block_info(dom_block);
1103                 ir_node                *value;
1104                 ir_node                *expr;
1105                 ir_valueset_iterator_t  iter;
1106
1107                 foreach_valueset(dom_info->avail_out, value, expr, iter)
1108                         /* replace: use the leader from dominator, not local exp_gen */
1109                         ir_valueset_replace(info->avail_out, value, expr);
1110         }
1111
1112         DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1113 }
1114
1115 /* --------------------------------------------------------
1116  * Main algorithm redundancy detection
1117  * --------------------------------------------------------
1118  */
1119
1120 /**
1121  * Returns a valid mode if the value of expr is a partially redundant value.
1122  *
1123  * @param block   the block
1124  * @param expr    the expression
1125  *
1126  * @return mode of the expression if it is partially redundant else NULL
1127  */
1128 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1129 {
1130         ir_node *first_avail         = NULL;
1131         int      pos;
1132         int      arity               = get_irn_arity(block);
1133         int      fully_redundant     = 1;
1134         int      partially_redundant = 0;
1135         ir_mode *mode                = NULL;
1136
1137         DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1138
1139         /* for each predecessor blocks */
1140         for (pos = 0; pos < arity; ++pos) {
1141                 ir_node    *pred_block  = get_Block_cfgpred_block(block, pos);
1142                 block_info *pred_info;
1143                 ir_node    *trans_expr;
1144                 ir_node    *trans_value;
1145                 ir_node    *avail_expr;
1146
1147                 pred_info  = get_block_info(pred_block);
1148                 trans_expr = get_translated(pred_block, expr);
1149                 trans_value = identify(trans_expr);
1150
1151                 if (is_Const(trans_expr))
1152                         avail_expr = trans_expr;
1153                 else
1154                         avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1155
1156                 /* value might be available through a not yet existing constant */
1157                 if (avail_expr == NULL && is_Const(trans_expr)) {
1158                         /* limit range of new constants */
1159                         ir_mode   *cmode = get_irn_mode(trans_expr);
1160                         ir_tarval *upper = new_tarval_from_long(127, cmode);
1161                         ir_tarval *lower = new_tarval_from_long(-127, cmode);
1162                         ir_tarval *c     = get_Const_tarval(trans_expr);
1163
1164                         /* tarval within range? */
1165                         if (tarval_cmp(lower, c) == ir_relation_less_equal &&
1166                                 tarval_cmp(c, upper) == ir_relation_less_equal) {
1167                                 avail_expr = trans_expr;
1168                         } else {
1169                                 avail_expr = NULL;
1170                         }
1171             }
1172
1173                 DB((dbg, LEVEL_3, "avail_expr %+F  trans_expr %+F\n", avail_expr, trans_expr));
1174
1175                 if (avail_expr == NULL) {
1176                         pred_info->avail = trans_expr;
1177                         pred_info->found = 0;
1178                         fully_redundant  = 0;
1179                 } else {
1180                         /* expr is available, use the leader */
1181                         pred_info->avail    = avail_expr;
1182                         pred_info->found    = 1;
1183                         mode                = get_irn_mode(avail_expr);
1184                         partially_redundant = 1;
1185
1186                         if (first_avail == NULL)
1187                                 first_avail = avail_expr;
1188                         else if (first_avail != avail_expr)
1189                                 /* Multiple different expressions are available,
1190                                    This is why we need no cut over avail_out sets. */
1191                                 fully_redundant = 0;
1192
1193                         DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1194                 }
1195         }
1196
1197         /* If it is not the same value already existing along every predecessor
1198        and it is defined by some predecessor then it is partially redundant. */
1199         if (! partially_redundant || fully_redundant)
1200                 return NULL;
1201         return mode;
1202 }
1203
1204 /**
1205  * Updates the new_set of a block by adding the new_set of
1206  * the immediate dominating block.
1207  *
1208  * @param  the block
1209  */
1210 static void update_new_set(ir_node *block, ir_node *idom)
1211 {
1212         ir_node                *value;
1213         ir_node                *expr;
1214         ir_valueset_iterator_t  iter;
1215         block_info             *curr_info = get_block_info(block);
1216         block_info             *idom_info = get_block_info(idom);
1217         int                     updated   = 0;
1218
1219         DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1220         foreach_valueset(idom_info->new_set, value, expr, iter) {
1221                 /* inherit new_set from immediate dominator */
1222                 ir_valueset_insert(curr_info->new_set, value, expr);
1223                 /* replace in avail_out */
1224                 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1225         }
1226 #ifdef DEBUG_libfirm
1227         if (updated)
1228                 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1229 #endif
1230 } /* update_new_set */
1231
1232 /**
1233  * Checks if hoisting irn is greedy.
1234  * Greedy hoisting means that there are non partially redundant nodes
1235  * hoisted. This happens if a partially redundant node has
1236  * non redundant predecessors.
1237  */
1238 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1239 {
1240         int block_arity = get_irn_arity(block);
1241         int arity = get_irn_arity(irn);
1242         int pos, i;
1243         block_info *info = get_block_info(block);
1244
1245         /* As long as the predecessor values are available in all predecessor blocks,
1246            we can hoist this value. */
1247         for (pos = 0; pos < block_arity; ++pos) {
1248                 ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1249                 block_info *pred_info  = get_block_info(pred_block);
1250
1251                 for (i = 0; i < arity; ++i) {
1252                         ir_node *pred     = get_irn_n(irn, i);
1253                         ir_node *value;
1254                         ir_node *leader;
1255                         ir_node *trans;
1256                         ir_node *trans_val;
1257                         ir_node *avail;
1258
1259 #if MIN_CUT
1260                         /* Very conservative min cut. Phi might only have 1 user. */
1261                         if (is_Phi(pred) && get_irn_n_edges(pred) != 1)
1262                                 return 1;
1263 #endif
1264
1265                         if (is_Phi(pred) && get_nodes_block(pred) == block)
1266                                 continue;
1267
1268                         DB((dbg, LEVEL_3, "pred %+F\n", pred));
1269                         value = identify(pred);
1270                         leader = ir_valueset_lookup(info->antic_in, value);
1271                         if (! leader)
1272                                 leader = pred;
1273                         DB((dbg, LEVEL_3, "lead %+F\n", leader));
1274                         trans   = get_translated(pred_block, leader);
1275                         if (! trans)
1276                                 trans = pred;
1277                         DB((dbg, LEVEL_3, "trans %+F\n", trans));
1278
1279                         trans_val = identify(trans);
1280                         DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1281
1282                         if (is_Const(trans_val) || is_SymConst(trans_val)) {
1283                                 /* existing constant */
1284                                 if (get_irn_idx(trans_val) < environ->last_idx) {
1285                                         continue;
1286                                 } else {
1287                                         /* limit range of new constants */
1288                                         ir_mode   *cmode = get_irn_mode(trans);
1289                                         ir_tarval *upper = new_tarval_from_long(128, cmode);
1290                                         ir_tarval *lower = new_tarval_from_long(-128, cmode);
1291                                         ir_tarval *c     = get_Const_tarval(trans);
1292
1293                                         /* tarval within range? */
1294                                         if (tarval_cmp(lower, c) == ir_relation_less &&
1295                                                 tarval_cmp(c, upper) == ir_relation_less) {
1296                                                 continue;
1297                                         } else {
1298                                                 return 1;
1299                                         }
1300                                 }
1301                         }
1302
1303                         /* */
1304                         if (is_irn_constlike(trans_val))
1305                                 continue;
1306
1307                         avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1308
1309                         DB((dbg, LEVEL_3, "avail %+F\n", avail));
1310                         if (! avail)
1311                                 return 1;
1312 #if MIN_CUT
1313                         /* only optimize if predecessors have been optimized */
1314                         if (ir_valueset_lookup(info->antic_done, value) == NULL)
1315                                 return 1;
1316 #endif
1317                 }
1318         }
1319         return 0;
1320 }
1321
1322 /**
1323  * Perform insertion of partially redundant values.
1324  * For every block node, do the following:
1325  * 1.  Propagate the NEW_SETS of the dominator into the current block.
1326  * If the block has multiple predecessors,
1327  *     2a. Iterate over the ANTIC expressions for the block to see if
1328  *         any of them are partially redundant.
1329  *     2b. If so, insert them into the necessary predecessors to make
1330  *         the expression fully redundant.
1331  *     2c. Insert a new Phi merging the values of the predecessors.
1332  *     2d. Insert the new Phi, and the new expressions, into the
1333  *         NEW_SETS set.
1334  *
1335  * @param block  the block
1336  * @param ctx    the walker environment
1337  */
1338 static void insert_nodes_walker(ir_node *block, void *ctx)
1339 {
1340         pre_env                *env    = (pre_env*)ctx;
1341         int                     arity  = get_irn_arity(block);
1342         ir_node                *value;
1343         ir_node                *expr;
1344         block_info             *info;
1345         ir_node                *idom;
1346         int                     pos;
1347         ir_valueset_iterator_t  iter;
1348
1349         /* only blocks */
1350         if (! is_Block(block))
1351                 return;
1352
1353         /* ensure that even the start block has a new_set */
1354         info = get_block_info(block);
1355         if (info->new_set)
1356                 ir_valueset_del(info->new_set);
1357         info->new_set = ir_valueset_new(16);
1358
1359         if (block == env->start_block)
1360                 return;
1361
1362         DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1363
1364         idom = get_Block_idom(block);
1365         update_new_set(block, idom);
1366
1367         /* process only path joining blocks */
1368         if (arity < 2) {
1369                 return;
1370         }
1371
1372         /* This is the main reason antic_in is preverred over antic_out;
1373            we may iterate over every anticipated value first and not
1374            over the predecessor blocks. */
1375         foreach_valueset(info->antic_in, value, expr, iter) {
1376                 ir_mode  *mode;
1377                 ir_node  *phi;
1378                 ir_node **phi_in;
1379
1380                 /* already done? */
1381                 if (ir_valueset_lookup(info->antic_done, value))
1382                         continue;
1383
1384                 /* filter phi nodes from antic_in */
1385                 if (is_Phi(expr))
1386                         continue;
1387
1388                 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1389
1390                 /* A value computed in the dominator is totally redundant.
1391                    Hence we have nothing to insert. */
1392                 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1393                         DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1394                         DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1395
1396                         ir_valueset_insert(info->antic_done, value, expr);
1397                         continue;
1398                 }
1399
1400                 if (is_hoisting_greedy(expr, block)) {
1401                         DB((dbg, LEVEL_2, "greedy\n"));
1402                         continue;
1403                 }
1404
1405                 mode = is_partially_redundant(block, expr, value);
1406                 if (mode == NULL)
1407                         continue;
1408
1409 #if LOADS || DIVMODS
1410                 /* save old mode_M phis to remove keepalive edges later */
1411                 if (is_memop(expr)) {
1412                         ir_node *mem = get_memop_mem(expr);
1413                         if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1414                                 ir_nodeset_insert(env->keeps, mem);
1415                         }
1416                 }
1417 #endif
1418
1419 #ifdef DEBUG_libfirm
1420                 if (! is_Proj(expr)) {
1421                         if (env->first_iter)
1422                                 inc_stats(gvnpre_stats->first_iter_found);
1423                         inc_stats(gvnpre_stats->partially);
1424                 }
1425                 if (is_Load(expr) || is_Store(expr))
1426                         inc_stats(gvnpre_stats->loads);
1427                 else if (is_Div(expr) || is_Mod(expr))
1428                         inc_stats(gvnpre_stats->divmods);
1429 #endif
1430
1431                 phi_in = XMALLOCN(ir_node *, arity);
1432
1433                 /* for each predecessor block */
1434                 for (pos = 0; pos < arity; ++pos) {
1435                         ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1436                         block_info *pred_info;
1437
1438                         pred_info = get_block_info(pred_block);
1439
1440                         if (! pred_info->found) {
1441                                 int i;
1442                                 int node_arity = get_irn_arity(expr);
1443                                 ir_node **in = XMALLOCNZ(ir_node *, node_arity);
1444                                 ir_node *trans;
1445                                 ir_node *new_value, *new_value2;
1446                                 ir_node *target_block = pred_block;
1447
1448                                 for (i = 0; i < node_arity; ++i) {
1449                                         ir_node *pred     = get_irn_n(expr, i);
1450                                         ir_node *value    = identify(pred);
1451                                         ir_node *leader;
1452                                         ir_node *trans;
1453                                         ir_node *trans_val;
1454                                         ir_node *avail;
1455
1456                                         /* transform knowledge over the predecessor from
1457                                            anti-leader world into leader world. */
1458
1459                                         DB((dbg, LEVEL_3, "pred %+F\n", pred));
1460                                         value = identify(pred);
1461
1462                                         /* get leader for pred to lookup its translated value */
1463                                         leader = ir_valueset_lookup(info->antic_in, value);
1464                                         if (! leader)
1465                                                 leader = pred;
1466                                         DB((dbg, LEVEL_3, "lead %+F\n", leader));
1467
1468                                         trans   = get_translated(pred_block, leader);
1469                                         if (!trans)
1470                                                 trans = pred;
1471                                         DB((dbg, LEVEL_3, "trans %+F\n", trans));
1472
1473                                         /* in case of phi, we are done */
1474                                         if (is_Phi(pred) && get_nodes_block(pred) == block) {
1475                                                 in[i] = trans;
1476                                                 continue;
1477                                         }
1478
1479                                         trans_val = identify(trans);
1480                                         DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1481
1482                                         /* constants are always available but not in avail set */
1483                                         if (is_irn_constlike(trans_val)) {
1484                                                 in[i] = trans;
1485                                                 continue;
1486                                         }
1487
1488                                         /* use the leader
1489                                            In case of loads we need to make sure the hoisted
1490                                            loads are found despite their unique value. */
1491                                         avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1492                                         DB((dbg, LEVEL_3, "avail %+F\n", avail));
1493
1494                                         assert(avail && "predecessor has to be available");
1495                                         in[i] = avail;
1496                                 }
1497
1498                                 if (is_Proj(expr))
1499                                         target_block = get_nodes_block(in[0]);
1500
1501                                 /* Copy node to represent the new value.
1502                                    We use translated nodes as value representatives only.
1503                                    They have anti leaders as predecessors, not leaders!
1504                                    So we have to create a new node using leaders. */
1505                                 trans = new_ir_node(
1506                                         get_irn_dbg_info(expr),
1507                                         environ->graph,
1508                                         target_block,
1509                                         get_irn_op(expr),
1510                                         get_irn_mode(expr),
1511                                         get_irn_arity(expr),
1512                                         in);
1513                                 free(in);
1514                                 /* We need the attribute copy here, because the Hash value of a
1515                                    node might depend on it. */
1516                                 copy_node_attr(environ->graph, expr, trans);
1517
1518                                 /* value is now available in target block through trans
1519                                    insert (not replace) because it has not been available */
1520                                 new_value = identify_or_remember(trans);
1521                                 ir_valueset_insert(pred_info->avail_out, new_value, trans);
1522                                 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value));
1523
1524                                 new_value2 = identify(get_translated(pred_block, expr));
1525                                 ir_valueset_insert(pred_info->avail_out, new_value2, trans);
1526                                 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value2));
1527
1528                                 DB((dbg, LEVEL_3, "Use new %+F in %+F because %+F(%+F) not available\n", trans, pred_block, expr, value));
1529
1530                                 phi_in[pos] = trans;
1531                         } else {
1532                                 /* value available */
1533                                 phi_in[pos] = pred_info->avail;
1534                         }
1535                         DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1536                 }
1537
1538                 /* We do not connect tuples as they will be connected automatically
1539                    by the corresponding projections. */
1540                 if (get_irn_mode(expr) != mode_T) {
1541
1542                         phi = new_r_Phi(block, arity, phi_in, mode);
1543                         DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1544
1545                         /* This value is now available through the new phi.
1546                            insert || replace in avail_out */
1547                         ir_valueset_replace(info->avail_out, value, phi);
1548                         ir_valueset_insert(info->new_set, value, phi);
1549                 }
1550                 free(phi_in);
1551
1552                 /* already optimized this value in this block */
1553                 ir_valueset_insert(info->antic_done, value, expr);
1554                 env->changes |= 1;
1555         }
1556 }
1557
1558 #if HOIST_HIGH
1559 static void update_new_set_walker(ir_node *block, void *ctx)
1560 {
1561         pre_env *env = (pre_env*)ctx;
1562
1563         if (! is_Block(block))
1564                 return;
1565         if (block == env->start_block)
1566                 return;
1567
1568         update_new_set(block, get_Block_idom(block));
1569 }
1570
1571 /**
1572  * Domtree block walker to insert nodes with dying operands
1573  * into the highest possible block whilst still being anticipated.
1574  */
1575 static void hoist_high(ir_node *block, void *ctx)
1576 {
1577         pre_env                *env        = (pre_env*)ctx;
1578         block_info             *curr_info;
1579         ir_valueset_iterator_t  iter;
1580         ir_node                *expr;
1581         ir_node                *value;
1582         int                     arity      = get_irn_arity(block);
1583
1584         if (! is_Block(block))
1585                 return;
1586
1587         curr_info = get_block_info(block);
1588
1589         if (curr_info->new_set)
1590                 ir_valueset_del(curr_info->new_set);
1591         curr_info->new_set = ir_valueset_new(16);
1592
1593         if (block == env->start_block)
1594                 return;
1595
1596         if (arity < 2)
1597                 return;
1598
1599         DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1600
1601         /* foreach entry optimized by insert node phase */
1602         foreach_valueset(curr_info->antic_done, value, expr, iter) {
1603                 int pos;
1604
1605                 /* TODO currently we cannot handle load and their projections */
1606                 if (is_memop(expr) || is_Proj(expr))
1607                         continue;
1608
1609                 DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
1610
1611                 /* visit hoisted expressions */
1612                 for (pos = 0; pos < arity; ++pos) {
1613                         /* standard target is predecessor block */
1614                         ir_node    *target     = get_Block_cfgpred_block(block, pos);
1615                         block_info *pred_info  = get_block_info(target);
1616                         ir_node    *avail;
1617                         ir_node    *new_target;
1618                         ir_node    *trans_expr;
1619                         ir_node    *trans_value;
1620                         ir_node    *dom;
1621                         int         avail_arity;
1622                         int         i;
1623                         unsigned    nest_depth;
1624                         block_info *dom_info;
1625
1626                         /* get phi translated value */
1627                         trans_expr  = get_translated(target, expr);
1628                         trans_value = identify(trans_expr);
1629                         avail       = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1630
1631                         /* get the used expr on this path */
1632
1633                         /* TODO when does this happen? */
1634                         if (avail == NULL)
1635                                 continue;
1636
1637                         avail_arity = get_irn_arity(avail);
1638                         value = identify(avail);
1639
1640                         /* anticipation border */
1641                         new_target  = NULL;
1642                         nest_depth  = get_loop_depth(get_irn_loop(target));
1643
1644                         /* Either push the hoisted nodes up their path,
1645                            or try to put them directly into their common dominator. */
1646 #if COMMON_DOM
1647                         /* By using block (instead of target) as initial block,
1648                            we only allow hoisting into a common block of
1649                            both predecessor blocks. */
1650                         dom         = block;
1651 #else
1652                         dom         = target;
1653 #endif
1654
1655                         while (dom && dom != get_Block_idom(block)) {
1656
1657                                 dom = get_Block_idom(dom);
1658                                 dom_info = get_block_info(dom);
1659                                 DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
1660
1661                                 /* TODO Being in antic_in means hoistable above block,
1662                                    but we need 'hoistable into block'.
1663                                    This could be achieved by a flag for each valueset pair,
1664                                    being set during antic computation. */
1665
1666                                 /* check if available node ist still anticipated and clean */
1667                                 if (! ir_valueset_lookup(dom_info->antic_in, value)) {
1668                                         DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
1669                                         break;
1670                                 }
1671
1672                                 nest_depth = get_loop_depth(get_irn_loop(dom));
1673
1674                                 /* do not hoist into loops */
1675                                 if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
1676                                         DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
1677                                         /* not a suitable location */
1678                                         continue;
1679                                 }
1680
1681                                 /* check if operands die */
1682
1683                                 /* check for uses on current path */
1684                                 for (i = 0; i < avail_arity; i++) {
1685                                         ir_node   *pred       = get_irn_n(avail, i);
1686                                         ir_node   *pred_value = identify(pred);
1687
1688                                         if (dom == NULL)
1689                                                 break;
1690
1691                                         DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
1692
1693                                         if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
1694                                                 DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
1695                                                 dom = NULL;
1696                                                 break;
1697                                         }
1698
1699                                         /* check every successor */
1700                                         foreach_out_edge(pred, edge) {
1701                                                 ir_node *succ = get_edge_src_irn(edge);
1702                                                 DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
1703
1704                                                 /* check only successors on current path to end */
1705                                                 if (block_dominates(dom, get_nodes_block(succ))) {
1706                                                         ir_node *succ_value = identify(succ);
1707
1708                                                         /* Do we have another user than avail?
1709                                                            Then predecessor is not dead after removal of avail. */
1710                                                         if (succ_value != value) {
1711                                                                 DB((dbg, LEVEL_4, "still used in %+F\n", succ));
1712                                                                 dom = NULL;
1713                                                                 break;
1714                                                         }
1715                                                 }
1716                                         }
1717                                 }
1718                                 if (dom)
1719                                         new_target = dom;
1720
1721 #if COMMON_DOM
1722                                 /* only try common dominator */
1723                                 break;
1724 #endif
1725                         }
1726
1727                         /* put node into new target block */
1728                         if (new_target) {
1729                                 block_info *target_info = get_block_info(new_target);
1730                                 int         nn_arity    = get_irn_arity(avail);
1731                                 ir_node   **in          = XMALLOCN(ir_node *, nn_arity);
1732                                 ir_node    *nn;
1733                                 int         i;
1734
1735                                 DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
1736                                 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1737
1738                                 for (i = 0; i < nn_arity; ++i) {
1739                                         ir_node *pred       = get_irn_n(avail, i);
1740                                         ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
1741                                         assert(avail_pred);
1742                                         in[i] = avail_pred;
1743                                 }
1744                                 nn = new_ir_node(
1745                                         get_irn_dbg_info(avail),
1746                                         environ->graph,
1747                                         new_target,
1748                                         get_irn_op(avail),
1749                                         get_irn_mode(avail),
1750                                         nn_arity,
1751                                         in);
1752                                 free(in);
1753
1754                                 identify_or_remember(nn);
1755                                 /* TODO Nodes are inserted into a dominating block and should
1756                                    be available from this point on. Currently we do not push
1757                                    the availability information through during the walk. */
1758                                 ir_valueset_insert(target_info->new_set, value, nn);
1759                                 ir_valueset_insert(target_info->avail_out, value, nn);
1760                         }
1761                 }
1762         }
1763 }
1764 #endif
1765
1766 /* --------------------------------------------------------
1767  * Elimination of fully redundant nodes
1768  * --------------------------------------------------------
1769  */
1770
1771 /**
1772  * Walker which finds redundant nodes using avail_out sets
1773  * and exchanges them for existing ones.
1774  * We cannot change the graph here as this would affect
1775  * the hash values of the nodes.
1776  *
1777  * @param irn  the node
1778  * @param ctx  the walker environment
1779  */
1780 static void eliminate(ir_node *irn, void *ctx)
1781 {
1782         pre_env *env = (pre_env*)ctx;
1783
1784         if (! is_Block(irn)) {
1785                 ir_node    *block = get_nodes_block(irn);
1786                 block_info *info  = get_block_info(block);
1787                 ir_node    *value = identify(irn);
1788
1789                 if (value != NULL) {
1790                         ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1791                         DB((dbg, LEVEL_3, "Elim %+F(%+F) avail %+F\n", irn, value, expr));
1792
1793                         if (expr != NULL && expr != irn) {
1794                                 elim_pair *p = OALLOC(env->obst, elim_pair);
1795
1796                                 p->old_node = irn;
1797                                 p->new_node = expr;
1798                                 p->next     = env->pairs;
1799                                 if (get_irn_idx(expr) > env->last_idx)
1800                                         p->reason = FS_OPT_GVN_PARTLY;
1801                                 else
1802                                         p->reason = FS_OPT_GVN_FULLY;
1803                                 env->pairs = p;
1804                                 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1805                         }
1806                 }
1807         }
1808 }  /* eliminate */
1809
1810 /**
1811  * Do all the recorded changes and optimize
1812  * newly created Phi's.
1813  *
1814  * @param pairs  list of elimination pairs
1815  */
1816 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1817 {
1818         elim_pair             *p;
1819         ir_node               *end    = environ->end_node;
1820
1821         for (p = pairs; p != NULL; p = p->next) {
1822                 /* might be already changed */
1823                 p->new_node = skip_Id(p->new_node);
1824
1825                 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1826
1827                 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1828                  * which we can optimize here */
1829                 if (is_Phi(p->new_node)) {
1830                         int      i;
1831                         ir_node *res = NULL;
1832
1833                         for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1834                                 ir_node *pred = get_irn_n(p->new_node, i);
1835
1836                                 if (pred != p->old_node) {
1837                                         if (res) {
1838                                                 res = NULL;
1839                                                 break;
1840                                         }
1841                                         res = pred;
1842                                 }
1843                         }
1844                         if (res) {
1845                                 exchange(p->new_node, res);
1846                                 p->new_node = res;
1847                         }
1848                 }
1849                 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1850
1851                 exchange(p->old_node, p->new_node);
1852         }
1853
1854         /* remove keep alive edges of unused mode_M phis */
1855         foreach_ir_nodeset(keeps, m_phi, iter) {
1856                 remove_End_keepalive(end, m_phi);
1857         }
1858 }  /* eliminate_nodes */
1859
1860
1861 /* --------------------------------------------------------
1862  * GVN PRE pass
1863  * --------------------------------------------------------
1864  */
1865
1866 /**
1867  * Gvn_Pre algorithm.
1868  *
1869  * @param irg   the graph
1870  * @param env   the environment
1871  */
1872 static void gvn_pre(ir_graph *irg, pre_env *env)
1873 {
1874         unsigned              antic_iter;
1875         unsigned              insert_iter;
1876
1877         DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1878
1879         /* allocate block info */
1880         irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1881
1882         ir_nodehashmap_init(&value_map);
1883
1884         /* generate exp_gen */
1885         irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
1886         dump_all_expgen_sets(env->list);
1887
1888         /* compute the avail_out sets for all blocks */
1889         dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
1890
1891         /* compute the anticipated value sets for all blocks */
1892         antic_iter      = 0;
1893         env->first_iter = 1;
1894
1895         env->iteration = 1;
1896         /* antic_in passes */
1897         do {
1898                 ++antic_iter;
1899                 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
1900                 env->changes = 0;
1901                 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
1902                 env->first_iter = 0;
1903                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1904                 env->iteration ++;
1905         } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
1906
1907         DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
1908
1909         ir_nodeset_init(env->keeps);
1910         insert_iter       = 0;
1911         env->first_iter   = 1;
1912         /* compute redundant expressions */
1913         do {
1914                 ++insert_iter;
1915                 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
1916                 env->changes = 0;
1917                 /* TODO topologically top down would be better; fewer iterations. */
1918                 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
1919                 env->first_iter = 0;
1920                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1921         } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
1922         DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
1923
1924 #if HOIST_HIGH
1925         /* An attempt to reduce lifetimes by hoisting already hoisted values
1926            even higher if their operands die. */
1927         dom_tree_walk_irg(irg, hoist_high, NULL, env);
1928         /* update avail_out for elimination */
1929         dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
1930 #endif
1931
1932         /* Deactivate edges to prevent intelligent removal of nodes,
1933            or else we will get deleted nodes which we try to exchange. */
1934         edges_deactivate(environ->graph);
1935
1936         /* eliminate nodes */
1937         irg_walk_graph(irg, NULL, eliminate, env);
1938         eliminate_nodes(env->pairs, env->keeps);
1939
1940         ir_nodeset_destroy(env->keeps);
1941 }
1942
1943 /**
1944  * Gvn_Pre pass for graph irg.
1945  *
1946  * @param irg   the graph
1947  */
1948 void do_gvn_pre(ir_graph *irg)
1949 {
1950         struct obstack        obst;
1951         pre_env               env;
1952         ir_nodeset_t          keeps;
1953         optimization_state_t  state;
1954         block_info           *block_info;
1955
1956         /* bads and unreachables cause too much trouble with dominance,
1957            loop info for endless loop detection,
1958            no critical edges is PRE precondition
1959          */
1960         assure_irg_properties(irg,
1961                 IR_GRAPH_PROPERTY_NO_BADS
1962                 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
1963                 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
1964                 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
1965                 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
1966                 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
1967
1968         /* register a debug mask */
1969         FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
1970
1971         save_optimization_state(&state);
1972         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
1973
1974         edges_activate(irg);
1975
1976         environ = &env;
1977         DEBUG_ONLY(init_stats();)
1978
1979         /* setup environment */
1980         obstack_init(&obst);
1981         env.graph        = irg;
1982         env.obst         = &obst;
1983         env.list         = NULL;
1984         env.start_block  = get_irg_start_block(irg);
1985         env.end_block    = get_irg_end_block(irg);
1986         env.end_node     = get_irg_end(irg);
1987         env.pairs        = NULL;
1988         env.keeps        = &keeps;
1989         env.last_idx     = get_irg_last_idx(irg);
1990
1991         /* Detect and set links of infinite loops to non-zero. */
1992         analyse_loops(irg);
1993
1994 #if OPTIMIZE_NODES
1995         new_identities(irg);
1996         env.value_table = irg->value_table;
1997         irg->value_table = NULL;
1998 #endif
1999
2000         /* Switch on GCSE. We need it to correctly compute
2001            the value of a node, which is independent from
2002            its block. */
2003         set_opt_global_cse(1);
2004         /* new_identities() */
2005         if (irg->value_table != NULL)
2006                 del_pset(irg->value_table);
2007         /* initially assumed nodes in pset are 512 */
2008         irg->value_table = new_pset(compare_gvn_identities, 512);
2009 #if OPTIMIZE_NODES
2010         env.gvnpre_values = irg->value_table;
2011 #endif
2012
2013         /* do GVN-PRE pass */
2014         gvn_pre(irg, &env);
2015         DEBUG_ONLY(print_stats();)
2016
2017         /* clean up: delete all sets */
2018         for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2019                 free_block_info(block_info);
2020         }
2021
2022         DEBUG_ONLY(free_stats();)
2023         ir_nodehashmap_destroy(&value_map);
2024         obstack_free(&obst, NULL);
2025         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2026
2027         /* Pin the graph again.
2028            This is needed due to the use of set_opt_global_cse(1) */
2029         set_irg_pinned(irg, op_pin_state_pinned);
2030         restore_optimization_state(&state);
2031         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2032
2033 #if OPTIMIZE_NODES
2034         irg->value_table = env.value_table;
2035         del_pset(irg->value_table);
2036         irg->value_table = env.gvnpre_values;
2037 #endif
2038
2039         /* TODO There seem to be optimizations that try to use the existing
2040            value_table. */
2041         new_identities(irg);
2042
2043         /* TODO assure nothing else breaks. */
2044         set_opt_global_cse(0);
2045         edges_activate(irg);
2046 }
2047
2048 /* Creates an ir_graph pass for do_gvn_pre. */
2049 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2050 {
2051         return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
2052 }