08dcdf14f35b24666af0aa60d93ded3e52d3a057
[libfirm] / ir / opt / gvn_pre.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Global Value Numbering Partial Redundancy Elimination
23  *          (VanDrunen Hosking 2004)
24  * @author  Michael Beck
25  * @brief
26  */
27 #include "config.h"
28
29 #include "debug.h"
30 #include "ircons.h"
31 #include "irdom.h"
32 #include "iredges.h"
33 #include "irflag.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
41 #include "irouts.h"
42 #include "irpass.h"
43 #include "valueset.h"
44 #include "irloop.h"
45
46 #include "irgraph_t.h"
47 #include "irnode_t.h"
48 #include "iropt_t.h"
49 #include "plist.h"
50
51 #define MAX_ANTIC_ITER 10
52 #define MAX_INSERT_ITER 3
53
54 #define PARTLY_ONLY 0
55 #define HOIST_HIGH 0
56 #define BETTER_GREED 0
57 #define LOADS 1
58 #define DIVMODS 0
59 #define NO_INF_LOOPS 0
60 #define NO_INF_LOOPS2 0
61
62 /** Additional info we need for every block. */
63 typedef struct block_info {
64         ir_valueset_t     *exp_gen;    /* contains this blocks clean expressions */
65         ir_valueset_t     *avail_out;  /* available values at block end */
66         ir_valueset_t     *antic_in;   /* clean anticipated values at block entry */
67         ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert nodes phase*/
68         ir_valueset_t     *new_set;    /* new by hoisting made available values */
69         ir_nodehashmap_t  *trans;      /* contains translated nodes translated into block */
70         ir_node           *avail;      /* saves available node for insert node phase */
71         int                found;      /* saves kind of availability for insert_node phase */
72         ir_node           *block;      /* block of the block_info */
73         struct block_info *next;       /* links all instances for easy access */
74 } block_info;
75
76 /**
77  * A pair of nodes to be exchanged.
78  * We have to defer the exchange because our hash-sets cannot
79  * find an already replaced node.
80  */
81 typedef struct elim_pair {
82         ir_node *old_node;      /* node that will be replaced */
83         ir_node *new_node;      /* replacement for old_node */
84         struct elim_pair *next; /* links all instances for easy access */
85         int     reason;         /* reason for the replacement */
86 } elim_pair;
87
88 /** environment for the GVN-PRE algorithm */
89 typedef struct pre_env {
90         ir_graph       *graph;        /* current graph */
91         struct obstack *obst;         /* obstack to allocate on */
92         ir_node        *start_block;  /* start block of the current graph */
93         ir_node        *end_block;    /* end block of the current graph */
94         ir_node        *end_node;     /* end node of the current graph */
95         block_info     *list;         /* block_info list head */
96         elim_pair      *pairs;        /* elim_pair list head */
97         ir_nodeset_t   *keeps;        /* a list of to be removed phis to kill their keep alive edges */
98         unsigned        last_idx;     /* last node index of input graph */
99         char            changes;      /* flag for fixed point iterations - non-zero if changes occurred */
100         char            first_iter;   /* non-zero for first fixed point iteration */
101 } pre_env;
102
103 static pre_env *environ;
104 /* custom GVN value map */
105 static ir_nodehashmap_t value_map;
106
107 /* debug module handle */
108 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
109
110 #ifdef DEBUG_libfirm
111
112 /* --------------------------------------------------------
113  * Statistics
114  * --------------------------------------------------------
115  */
116
117 typedef struct gvnpre_statistics {
118         int replaced;
119         int partially;
120         int fully;
121         int loads;
122         int divmods;
123         int hoist_high;
124         int first_iter_found;
125         int antic_iterations;
126         int insert_iterations;
127         int infinite_loops;
128 } gvnpre_statistics;
129
130 gvnpre_statistics *gvnpre_stats = NULL;
131
132 static void init_stats()
133 {
134         gvnpre_stats = XMALLOCZ(gvnpre_statistics);
135 }
136
137 static void free_stats()
138 {
139         free(gvnpre_stats);
140         gvnpre_stats = NULL;
141 }
142
143 static void print_stats()
144 {
145         gvnpre_statistics *stats = gvnpre_stats;
146         DB((dbg, LEVEL_1, "replaced             : %d\n", stats->replaced));
147         DB((dbg, LEVEL_1, "antic_in iterations  : %d\n", stats->antic_iterations));
148         DB((dbg, LEVEL_1, "insert iterations    : %d\n", stats->insert_iterations));
149         DB((dbg, LEVEL_1, "infinite loops       : %d\n", stats->infinite_loops));
150         DB((dbg, LEVEL_1, "fully redundant      : %d\n", stats->fully));
151         DB((dbg, LEVEL_1, "partially redundant  : %d\n", stats->partially));
152         DB((dbg, LEVEL_1, "  loads                : %d\n", stats->loads));
153         DB((dbg, LEVEL_1, "  Divs/Mods            : %d\n", stats->divmods));
154         DB((dbg, LEVEL_1, "  hoist high           : %d\n", stats->hoist_high));
155         DB((dbg, LEVEL_1, "  first iteration      : %d\n", stats->first_iter_found));
156 }
157
158 #define set_stats(var, value) (var)=(value)
159 #define inc_stats(var)        ((var)+=1)
160
161 /* --------------------------------------------------------
162  * Dump valuesets
163  * --------------------------------------------------------
164  */
165
166 /**
167  * Dump a value set.
168  *
169  * @param set    the set to dump
170  * @param txt    a text to describe the set
171  * @param block  the owner block of the set
172  */
173 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
174 {
175         ir_valueset_iterator_t iter;
176         ir_node *value, *expr;
177         int i;
178
179         DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
180         i = 0;
181         foreach_valueset(set, value, expr, iter) {
182                 if ((i & 3) == 3)
183                         DB((dbg, LEVEL_2, "\n"));
184                 if (value != expr)
185                         DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
186                 else
187                         DB((dbg, LEVEL_2, " %+F,", expr));
188                 ++i;
189         }
190         DB((dbg, LEVEL_2, "\n}\n"));
191 }  /* dump_value_set */
192
193 /**
194  * Dump all exp_gen value sets.
195  *
196  * @param list  the list of block infos to retrieve the sets from
197  */
198 static void dump_all_expgen_sets(block_info *list)
199 {
200         block_info *block_info;
201
202         for (block_info = list; block_info != NULL; block_info = block_info->next) {
203                 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
204         }
205 }
206
207 #else
208
209 #define dump_all_expgen_sets(list)
210 #define dump_value_set(set, txt, block)
211
212 #endif /* DEBUG_libfirm */
213
214 /* --------------------------------------------------------
215  * GVN Functions
216  * --------------------------------------------------------
217  */
218
219 /**
220  * Compares node collisions in valuetable.
221  * Modified identities_cmp().
222  */
223 static int compare_gvn_identities(const void *elt, const void *key)
224 {
225         ir_node *a = (ir_node *)elt;
226         ir_node *b = (ir_node *)key;
227         int i, irn_arity_a;
228
229         if (a == b) return 0;
230
231         /* phi nodes kill predecessor values and are always different */
232         if (is_Phi(a) || is_Phi(b))
233                 return 1;
234
235         /* memops are not the same, even if we want optimize them
236            we have to take the order in account */
237         if (is_memop(a) || is_memop(b)) {
238                 /* Loads with the same predecessors are the same value;
239                    this should only happen after phi translation. */
240                 if (! is_Load(a) || ! is_Load(b))
241                         return 1;
242         }
243
244         if ((get_irn_op(a) != get_irn_op(b)) ||
245             (get_irn_mode(a) != get_irn_mode(b))) return 1;
246
247         /* compare if a's in and b's in are of equal length */
248         irn_arity_a = get_irn_arity(a);
249         if (irn_arity_a != get_irn_arity(b))
250                 return 1;
251
252         /* blocks are never the same */
253         if (is_Block(a) || is_Block(b))
254                 return 1;
255
256         /* should only be used with gcse enabled */
257         assert(get_opt_global_cse());
258
259         /* compare a->in[0..ins] with b->in[0..ins] */
260         for (i = 0; i < irn_arity_a; ++i) {
261                 ir_node *pred_a = get_irn_n(a, i);
262                 ir_node *pred_b = get_irn_n(b, i);
263                 if (pred_a != pred_b) {
264                         if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
265                                 return 1;
266                 }
267         }
268
269         /*
270          * here, we already now that the nodes are identical except their
271          * attributes
272          */
273         if (a->op->ops.node_cmp_attr)
274                 return a->op->ops.node_cmp_attr(a, b);
275
276         return 0;
277 }
278
279 /**
280  * Identify does a lookup in the GVN valuetable.
281  * To be used when no new GVN values are to be created.
282  *
283  * @param e  a node representing an expression
284  * @return a node representing the value
285  */
286 static ir_node *identify(ir_node *irn)
287 {
288         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
289         if (value)
290                 return value;
291         /* irn represents a new value, so return the leader */
292         return identify_remember(irn);
293 }
294
295 /**
296  * remember() adds node irn to the GVN valuetable.
297  * Identify_remember only identifies values of nodes with the
298  * same predecessor nodes (not values). By creating a node from the predecessor
299  * values/leaders, a true valuetree is built. Phis kill their predecessor value,
300  * so no circular dependencies need to be resolved.
301  *
302  * TODO Improvement:
303  *      Maybe this could be implemented with a custom node hash that takes
304  *      phi nodes and true values (instead of predecessors) into account,
305  *      resulting in value numbers.
306  * TODO This unnecessarily also handles nodes like calls, which are never equal.
307  *
308  * @param irn  a node representing an expression
309  * @return     the value of the expression
310  */
311 static ir_node *remember(ir_node *irn)
312 {
313         int       arity   = get_irn_arity(irn);
314         int       i;
315         int       changed = 0;
316         ir_node **in      = XMALLOCN(ir_node *, arity);
317         ir_node  *value;
318
319         for (i = 0; i < arity; ++i) {
320                 ir_node *pred       = get_irn_n(irn, i);
321                 /* value and leader at the same time */
322                 ir_node *pred_value = identify(pred);
323
324                 /* phi will be translated anyway, so kill the predecessor values
325                    this also prevents circular dependencies */
326                 if (is_Phi(pred)) {
327                         /* every phi represents its own value */
328                         in[i] = pred;
329                         continue;
330                 }
331
332                 /* predecessor is not its value representation/the leader */
333                 if (pred != pred_value)
334                         changed = 1;
335                 in[i] = pred_value;
336         }
337
338         /* do not create new value for loads or else it will not
339            be found in avail_out during insert phase */
340         if (changed && ! is_Load(irn)) {
341                 /* create representative for */
342                 ir_node *nn = new_ir_node(
343                         get_irn_dbg_info(irn),
344                         get_irn_irg(irn),
345                         get_nodes_block(irn),
346                         get_irn_op(irn),
347                         get_irn_mode(irn),
348                         get_irn_arity(irn),
349                         in);
350                 copy_node_attr(environ->graph, irn, nn);
351
352                 /* now the value can be determined because the
353                    predecessors are the leaders */
354                 value = identify_remember(nn);
355         } else {
356                 value = identify_remember(irn);
357         }
358         free(in);
359
360         DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
361         ir_nodehashmap_insert(&value_map, irn, value);
362
363         return value;
364 }
365
366 /** When the value map has been built we may lookup expressions
367  *  and remember them if new.
368  */
369 static ir_node *identify_or_remember(ir_node *irn)
370 {
371         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
372         if (value)
373                 return value;
374         else
375                 return remember(irn);
376 }
377
378 /* --------------------------------------------------------
379  * Block info
380  * --------------------------------------------------------
381  */
382
383 /**
384  * Allocate block info for block block.
385  *
386  * @param block   the block
387  * @param env     the environment
388  */
389 static void alloc_block_info(ir_node *block, pre_env *env)
390 {
391         block_info *info = OALLOC(env->obst, block_info);
392
393         set_irn_link(block, info);
394         info->exp_gen    = ir_valueset_new(16);
395         info->avail_out  = ir_valueset_new(16);
396         info->antic_in   = ir_valueset_new(16);
397         info->antic_done = ir_valueset_new(16);
398         info->trans = XMALLOC(ir_nodehashmap_t);
399         ir_nodehashmap_init(info->trans);
400
401         info->new_set = NULL;
402         info->avail   = NULL;
403         info->block   = block;
404         info->found   = 1;
405
406         info->next = env->list;
407         env->list  = info;
408 }  /* alloc_block_info */
409
410 static void free_block_info(block_info *block_info)
411 {
412         ir_valueset_del(block_info->exp_gen);
413         ir_valueset_del(block_info->avail_out);
414         ir_valueset_del(block_info->antic_in);
415         if (block_info->trans) {
416                 ir_nodehashmap_destroy(block_info->trans);
417                 free(block_info->trans);
418         }
419         if (block_info->new_set)
420                 ir_valueset_del(block_info->new_set);
421 }
422
423 /**
424  * Bottom up walker that ensures that every block gets a block info.
425  *
426  * @param irn   the node
427  * @param ctx   the environment
428  */
429 static void block_info_walker(ir_node *irn, void *ctx)
430 {
431         if (is_Block(irn)) {
432                 pre_env *env = (pre_env*)ctx;
433                 alloc_block_info(irn, env);
434         }
435 }
436
437 /**
438  * Returns the block info of a block.
439  */
440 static block_info *get_block_info(ir_node *block)
441 {
442         return (block_info*)get_irn_link(block);
443 }
444
445 /* --------------------------------------------------------
446  * Infinite loop analysis
447  * --------------------------------------------------------
448  */
449
450 /**
451  * Walker to set block marks and loop links to 0.
452  */
453 static void clear_block_mark_loop_link(ir_node *block, void *env)
454 {
455         (void) env;
456
457         if (is_Block(block)) {
458                 set_Block_mark(block, 0);
459                 set_loop_link(get_irn_loop(block), NULL);
460         }
461 }
462
463 /**
464  * Returns non-zero if block is part of real loop loop.
465  */
466
467 static unsigned in_loop(ir_node *block, ir_loop *loop)
468 {
469         ir_loop *l     = get_irn_loop(block);
470         ir_loop *outer = get_irg_loop(environ->graph);
471
472         while (l != loop) {
473                 /* loop tree root is not a loop */
474                 if (l == NULL || l == outer)
475                         return 0;
476                 l = get_loop_outer_loop(l);
477         }
478         return 1;
479 }
480
481 /**
482  * Returns the outermost real loop of loop.
483  */
484 static ir_loop *get_loop_outermost(ir_loop *loop)
485 {
486         ir_loop *outer = get_irg_loop(environ->graph);
487         ir_loop *l     = loop;
488         ir_loop *last  = NULL;
489
490         while(l != outer) {
491                 last = l;
492                 l = get_loop_outer_loop(l);
493         }
494         return last;
495 }
496
497 /**
498  * Topologic bottom-up walker sets links of infinite loops to non-zero.
499  * Block marks are used to flag blocks reachable (from end) on the one hand,
500  * on the other hand they are set if the block is not part of an infinite loop.
501  */
502 static void infinite_loop_walker(ir_node *block, void *env)
503 {
504         int arity;
505         int i;
506         (void) env;
507
508         if (! is_Block(block))
509                 return;
510
511         /* start block has no predecessors */
512         if (block == environ->start_block)
513                 return;
514
515         arity = get_irn_arity(block);
516
517         /* block not part of a real loop: no infinite loop */
518         if (get_irn_loop(block) == get_irg_loop(environ->graph))
519                 set_Block_mark(block, 1);
520
521         if (get_Block_mark(block)) {
522                 /* reachable block: mark all cf predecessors */
523                 for (i = 0; i < arity; ++i) {
524                         ir_node *pred = get_Block_cfgpred_block(block, i);
525                         if (is_Bad(pred))
526                                 continue;
527                         set_Block_mark(pred, 1);
528                 }
529         } else {
530                 /* We are in a real loop and see an unreachable block. */
531                 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
532
533                 /* flag loop as infinite */
534                 set_loop_link(outermost_loop, outermost_loop);
535                 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
536
537                 /* The cf predecessors are unreachable, but can never be part of
538                    an infinite loop, because we just reached them. So we set the
539                    blockmark to prevent triggering the infinite loop detection. */
540
541                 /* passing information to the cf predecessors */
542                 for (i = 0; i < arity; ++i) {
543                         ir_node *pred = get_Block_cfgpred_block(block, i);
544
545                         if (is_Bad(pred))
546                                 continue;
547
548                         /* If our cf predecessor is in the same endless loop,
549                            it is also unreachable. */
550                         if (in_loop(pred, outermost_loop)) {
551                                 set_Block_mark(pred, 0);
552                         } else {
553                                 /* When we leave the unreachable loop, we artificially
554                                    declare the cf predecessor reachable. */
555                                 set_Block_mark(pred, 1);
556                         }
557                 }
558         }
559 }
560
561 /**
562  * Sets loop links of outermost infinite loops to non-zero.
563  */
564 static void analyse_loops(ir_graph *irg)
565 {
566         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
567
568         /* reset block mark and loop links */
569         irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
570
571         /* mark end block reachable */
572         set_Block_mark(get_irg_end_block(irg), 1);
573         irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
574
575         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
576 }
577
578 #if NO_INF_LOOPS || NO_INF_LOOPS2
579 /**
580  * Returns non-zero if block is part of an infinite loop.
581  */
582 static unsigned is_in_infinite_loop(ir_node *block)
583 {
584         ir_loop *loop;
585
586         assert(is_Block(block));
587         loop = get_irn_loop(block);
588         assert(loop);
589
590         loop = get_loop_outermost(loop);
591         if (loop)
592                 return (get_loop_link(loop) != NULL);
593         else
594                 return 0;
595 }
596 #endif
597
598 /* --------------------------------------------------------
599  * GVN-PRE Exp_gen
600  * --------------------------------------------------------
601  */
602
603 /**
604  * Returns non-zero if a node is movable and a possible candidate for PRE.
605  */
606 static unsigned is_nice_value(ir_node *n)
607 {
608         ir_mode *mode = get_irn_mode(n);
609
610         if (is_Phi(n))
611                 return 1;
612
613 #if LOADS || DIVMODS
614         if (is_Proj(n) && mode != mode_X && mode != mode_T)
615                 return 1;
616 #else
617         if (is_Proj(n))
618                 return 0;
619 #endif
620
621 #if LOADS
622         if (is_Load(n))
623                 return get_Load_volatility(n) == volatility_non_volatile;
624 #endif
625
626         if (get_irn_pinned(n) == op_pin_state_pinned)
627                 return 0;
628
629         if (! mode_is_data(mode)) {
630                 if (! is_Div(n) && ! is_Mod(n))
631                         return 0;
632         }
633         return 1;
634 }
635
636 /**
637  * Checks if a node n is clean in block block for exp_gen.
638  *
639  * @param n      the node
640  * @param block  the block
641  * @return non-zero value for clean node
642  */
643 static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
644 {
645         int         i, arity;
646
647         if (is_Phi(n))
648                 return 1;
649
650         if (! is_nice_value(n))
651                 return 0;
652
653         /* filter loads from antic_in */
654         if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
655                 return 0;
656
657         arity = get_irn_arity(n);
658         for (i = 0; i < arity; ++i) {
659                 ir_node *pred   = get_irn_n(n, i);
660                 ir_node *value;
661
662                 if (is_Phi(pred))
663                         continue;
664
665                 /* we only handle current block */
666                 if (get_nodes_block(pred) != block)
667                         continue;
668
669                 if (! is_nice_value(pred))
670                         return 0;
671
672                 value = identify(pred);
673                 if (! ir_valueset_lookup(valueset, value))
674                         return 0;
675
676         }
677         return 1;
678 }
679
680 /**
681  * Topological walker puts nodes in top-down topological order into exp_gen set.
682  * Assumed to walk blockwise and nodewise topologically top-down.
683  *
684  * @param irn    the node
685  * @param ctx    the environment
686  */
687 static void topo_walker(ir_node *irn, void *ctx)
688 {
689         ir_node    *block;
690         block_info *info;
691         ir_node    *value;
692         (void) ctx;
693
694         if (is_Block(irn))
695                 return;
696
697         /* GVN step: remember the value. */
698         value = remember(irn);
699
700         /* values that are not in antic_in also dont't need to be in any other set */
701         if (! is_nice_value(irn))
702                 return;
703
704         if (is_irn_constlike(irn))
705                 return;
706
707         block = get_nodes_block(irn);
708         info  = get_block_info(block);
709
710         ir_valueset_insert(info->avail_out, value, irn);
711
712         if (is_clean_in_block(irn, block, info->exp_gen)) {
713                 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
714
715                 ir_valueset_insert(info->exp_gen, value, irn);
716         }
717 }
718
719 /* --------------------------------------------------------
720  * GVN-PRE Antic_in
721  * --------------------------------------------------------
722  */
723
724 /**
725  * Gets result of nodes phi translation into block.
726  *
727  * @param node   the node
728  * @param block  the target block
729  *
730  * @return a phi translation of node node into block block or NULL
731  */
732 static ir_node *get_translated(ir_node *block, ir_node *node)
733 {
734         if (is_irn_constlike(node))
735                 return node;
736
737         return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
738 }
739
740 /**
741  * Saves result of phi translation of node into predecessor
742  * at pos of block succ.
743  *
744  * @param node   the node
745  * @param succ   the successor of the translation target block
746  * @param pos    the position of the predecessor block
747  * @param trans  the translation result
748  *
749  */
750 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
751 {
752         if (is_irn_constlike(node))
753                 return;
754         /* insert or replace */
755         ir_nodehashmap_insert(map, node, trans);
756 }
757
758 #if DIVMODS
759 /* Helper function to compare the values of pred and avail_pred. */
760 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
761 {
762         ir_node *avail_value = identify(avail_pred);
763         ir_node *pred_block  = get_Block_cfgpred_block(block, pos);
764         ir_node *trans_pred  = get_translated(pred_block, pred);
765         ir_node *value;
766
767         if (trans_pred == NULL)
768                 trans_pred = pred;
769         value = identify(trans_pred);
770
771         DB((dbg, LEVEL_3, "manual compare %+F  %+F\n", pred, avail_pred));
772
773         return (value == avail_value);
774 }
775
776 /**
777  * Does phi translation for redundant Div/Mod nodes only.
778  * Returns NULL for non-redundant node, which needs to be phi translated.
779  */
780 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
781 {
782         ir_node *mem   = get_memop_mem(divmod);
783         ir_node *trans = get_translated_pred(block, pos, mem);
784
785         if (trans == NULL)
786                 trans = mem;
787
788         /* no partial redundancy if this is a mode_M phi */
789         if (is_Proj(trans)) {
790                 /* The last memory operation in predecessor block */
791                 ir_node *avail_op = get_Proj_pred(trans);
792
793                 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
794                         unsigned left, right;
795
796                         if (is_Div(avail_op)) {
797                                 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
798                                     get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
799
800                                         left  = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
801                                         right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
802
803                                         if (left && right)
804                                                 return avail_op;
805                                 }
806                         } else if (is_Mod(avail_op)) {
807                             if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
808
809                                         left  = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
810                                         right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
811
812                                         if (left && right)
813                                                 return avail_op;
814                                 }
815                         }
816                 }
817         }
818         return NULL;
819 }
820 #endif
821
822 /**
823  * Translates an expression above a Phi.
824  *
825  * @param node        the node
826  * @param block       the block the node is translated into
827  * @param pos         the input number of the destination block
828  *
829  * @return a node representing the translated value
830  */
831 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *leaderset)
832 {
833         int       i;
834         int       arity;
835         ir_node **in;
836         ir_node  *pred_block = get_Block_cfgpred_block(block, pos);
837         ir_node  *nn;
838         unsigned  needed;
839
840         if (is_Phi(node)) {
841                 if (get_nodes_block(node) == block)
842                         return get_Phi_pred(node, pos);
843                 /* this phi does not need translation */
844                 return node;
845         }
846         arity = get_irn_arity(node);
847
848 #if DIVMODS
849         if (is_Div(node) || is_Mod(node)) {
850                 ir_node *avail_op = phi_translate_divmod(node, block, pos);
851                 if (avail_op)
852                         return avail_op;
853         }
854 #endif
855
856         needed = 0;
857         in = ALLOCANZ(ir_node *, arity);
858
859         /* A value has several representatives. The anti leader is chosen to be
860            the main representative. If we access a node as representative of a
861            value we always use the anti leader. The anti leader can be found by
862            antic_in(identify(node)). */
863         for (i = 0; i < arity; ++i) {
864                 ir_node *pred   = get_irn_n(node, i);
865                 ir_node *value  = identify(pred);
866                 /* get leader for pred to lookup its translated value */
867                 ir_node *leader = ir_valueset_lookup(leaderset, value);
868                 ir_node *pred_trans;
869                 ir_node *new_pred;
870
871                 if (! leader)
872                         leader = pred;
873                 /* we cannot find this value in antic_in, because the value
874                    has (possibly) changed! */
875                 pred_trans  = get_translated(pred_block, leader);
876
877                 DB((dbg, LEVEL_3, "trans %+F of %+F is  %+F\n", leader, pred_block, pred_trans));
878                 if (pred_trans == NULL) {
879                         new_pred = pred;
880                 } else {
881                         new_pred = pred_trans;
882
883                         /* loads: in case of memory projection and load,
884                            skip them and use the loads memory */
885                         if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
886                                 ir_node *load = get_Proj_pred(pred_trans);
887                                 /* translated predecessor will be different */
888                                 needed |= 1;
889
890                                 if (is_Load(load)) {
891                                         /* Put new load under the adjacent loads mem
892                                            such that GVN may compare them. */
893                                         new_pred = get_Load_mem(load);
894                                 }
895                         } else {
896                                 /* predecessor value changed, so translation is needed */
897                                 if (identify(new_pred) != identify(pred))
898                                         needed |= 1;
899                         }
900                 }
901
902                 DB((dbg, LEVEL_4, "in %+F\n", new_pred));
903                 in[i] = new_pred;
904         }
905
906         if (! needed)
907                 return node;
908
909         DB((dbg, LEVEL_3, "Translate\n"));
910
911         if (is_Proj(node))
912                 pred_block = get_nodes_block(in[0]);
913
914         /* copy node to represent the new value.
915            We do not translate nodes that do not need translation,
916            so we use the newly created nodes as value representatives only.
917            Their block is not important, because we create new ones during
918            the insert node phase. */
919         nn = new_ir_node(
920                 get_irn_dbg_info(node),
921                 environ->graph,
922                 pred_block,
923                 get_irn_op(node),
924                 get_irn_mode(node),
925                 arity,
926                 in);
927         /* We need the attribute copy here, because the Hash value of a
928            node might depend on it. */
929         copy_node_attr(environ->graph, node, nn);
930         /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
931            because it already uses availability. */
932
933         DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
934         return nn;
935 }
936
937 /**
938  * Block-walker, computes Antic_in(block).
939  * Builds a value tree out of the graph by translating values
940  * over phi nodes.
941  *
942  * @param block  the block
943  * @param ctx    the walker environment
944  */
945 static void compute_antic(ir_node *block, void *ctx)
946 {
947         pre_env                *env       = (pre_env*)ctx;
948         block_info             *succ_info;
949         block_info             *info;
950         ir_node                *succ;
951         ir_node                *value;
952         ir_node                *expr;
953         size_t                  size;
954         ir_valueset_iterator_t  iter;
955         int                     n_succ;
956
957         /* filter blocks from topological walker */
958         if (! is_Block(block))
959                 return;
960
961         /* the end block has no successor */
962         if (block == env->end_block)
963                 return;
964
965         info = get_block_info(block);
966         /* track changes */
967         size = ir_valueset_size(info->antic_in);
968         n_succ = get_Block_n_cfg_outs(block);
969
970         /* add exp_gen */
971         if (env->first_iter) {
972 #if NO_INF_LOOPS
973                 /* keep antic_in of infinite loops empty */
974                 if (! is_in_infinite_loop(block)) {
975                         foreach_valueset(info->exp_gen, value, expr, iter) {
976                                 ir_valueset_insert(info->antic_in, value, expr);
977                         }
978                 }
979 #else
980                 foreach_valueset(info->exp_gen, value, expr, iter) {
981                         ir_valueset_insert(info->antic_in, value, expr);
982                 }
983 #endif
984         }
985
986         /* successor might have phi nodes */
987         if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
988                 succ      = get_Block_cfg_out(block, 0);
989                 int pos   = get_Block_cfgpred_pos(succ, block);
990                 succ_info = get_block_info(succ);
991
992                 /* initialize translated set */
993                 if (env->first_iter) {
994                         info->trans = XMALLOC(ir_nodehashmap_t);
995                         ir_nodehashmap_init(info->trans);
996                 }
997
998                 foreach_valueset(succ_info->antic_in, value, expr, iter) {
999                         ir_node *trans = get_translated(block, expr);
1000                         ir_node *trans_value;
1001                         ir_node *represent;
1002
1003                         if (trans == NULL)
1004                                 trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
1005                         /* create new value if necessary */
1006                         trans_value = identify_or_remember(trans);
1007
1008                         DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1009
1010                         /* on value change (phi present) we need the translated node
1011                            to represent the new value for possible further translation. */
1012                         if (value != trans_value)
1013                                 represent = trans;
1014                         else
1015                                 represent = expr;
1016
1017                         // test only 1 translation, because more failed, because of uncleanliness of load
1018                         if (is_clean_in_block(expr, block, info->antic_in) && ! is_Load(expr)) {
1019 #if NO_INF_LOOPS2
1020                                 /* TODO to be determined why nearly every spec benchmark fails */
1021                                 if (! is_in_infinite_loop(succ) || ! is_backedge(succ, pos)) {
1022                                         ir_valueset_replace(info->antic_in, trans_value, represent);
1023                                 }
1024 #else
1025                                 ir_valueset_replace(info->antic_in, trans_value, represent);
1026 #endif
1027                         }
1028                         set_translated(info->trans, expr, represent);
1029                 }
1030
1031         } else if (n_succ > 1) {
1032                 int         i;
1033                 ir_node    *common     = NULL;
1034                 ir_node    *succ0      = get_Block_cfg_out(block, 0);
1035                 block_info *succ0_info = get_block_info(succ0);
1036
1037                 /* disjoint of antic_ins */
1038                 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1039                         /* iterate over remaining successors */
1040                         for (i = 1; i < n_succ; ++i) {
1041                                 ir_node    *succ      = get_Block_cfg_out(block, i);
1042                                 block_info *succ_info = get_block_info(succ);
1043
1044                                 /* value in antic_in? */
1045                                 common = ir_valueset_lookup(succ_info->antic_in, value);
1046                                 if (common == NULL)
1047                                         break;
1048                         }
1049
1050                         if (common && is_clean_in_block(expr, block, info->antic_in))
1051                                 ir_valueset_replace(info->antic_in, value, expr);
1052                 }
1053         }
1054
1055
1056         DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1057
1058         if (size != ir_valueset_size(info->antic_in))
1059                 env->changes |= 1;
1060 }
1061
1062 /* --------------------------------------------------------
1063  * Main algorithm Avail_out
1064  * --------------------------------------------------------
1065  */
1066
1067 /**
1068  * Computes Avail_out(block):
1069  *
1070  * Avail_in(block)  = Avail_out(dom(block))
1071  * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1072  *
1073  * Precondition:
1074  *  This function must be called in the top-down topological order:
1075  *  Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1076  *
1077  * @param block   the block
1078  * @param ctx     walker context
1079  */
1080 static void compute_avail_top_down(ir_node *block, void *ctx)
1081 {
1082         pre_env    *env   = (pre_env*)ctx;
1083         block_info *info;
1084
1085         if (block == env->end_block)
1086                 return;
1087
1088         info  = get_block_info(block);
1089
1090         /* Add all nodes from the immediate dominator.
1091            This ensures that avail_out contains the leader. */
1092         if (block != env->start_block) {
1093                 ir_node                *dom_block = get_Block_idom(block);
1094                 block_info             *dom_info  = get_block_info(dom_block);
1095                 ir_node                *value;
1096                 ir_node                *expr;
1097                 ir_valueset_iterator_t  iter;
1098
1099                 foreach_valueset(dom_info->avail_out, value, expr, iter)
1100                         /* replace: use the leader from dominator, not local exp_gen */
1101                         ir_valueset_replace(info->avail_out, value, expr);
1102         }
1103
1104         DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1105 }
1106
1107 /* --------------------------------------------------------
1108  * Main algorithm redundancy detection
1109  * --------------------------------------------------------
1110  */
1111
1112 /**
1113  * Returns a valid mode if the value of expr is a partially redundant value.
1114  *
1115  * @param block   the block
1116  * @param expr    the expression
1117  *
1118  * @return mode of the expression if it is partially redundant else NULL
1119  */
1120 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1121 {
1122         ir_node *first_avail         = NULL;
1123         int      pos;
1124         int      arity               = get_irn_arity(block);
1125         int      fully_redundant     = 1;
1126         int      partially_redundant = 0;
1127         ir_mode *mode                = NULL;
1128
1129         DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1130
1131         /* for each predecessor blocks */
1132         for (pos = 0; pos < arity; ++pos) {
1133                 ir_node    *pred_block  = get_Block_cfgpred_block(block, pos);
1134                 block_info *pred_info;
1135                 ir_node    *trans_expr;
1136                 ir_node    *trans_value;
1137                 ir_node    *avail_expr;
1138
1139                 pred_info  = get_block_info(pred_block);
1140                 trans_expr = get_translated(pred_block, expr);
1141                 trans_value = identify(trans_expr);
1142
1143                 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1144                 /* value might be available through a not yet existing constant */
1145                 if (avail_expr == NULL && is_Const(trans_expr)) {
1146                         /* limit range of new constants */
1147                         ir_mode   *cmode = get_irn_mode(trans_expr);
1148                         ir_tarval *upper = new_tarval_from_long(127, cmode);
1149                         ir_tarval *lower = new_tarval_from_long(-127, cmode);
1150                         ir_tarval *c     = get_Const_tarval(trans_expr);
1151
1152                         if (tarval_cmp(lower, c) != ir_relation_greater_equal &&
1153                                 tarval_cmp(c, upper) != ir_relation_greater_equal) {
1154                                 avail_expr = NULL;
1155                         } else {
1156                                 avail_expr = trans_expr;
1157                         }
1158             }
1159
1160                 DB((dbg, LEVEL_3, "avail_expr %+F  trans_expr %+F\n", avail_expr, trans_expr));
1161
1162                 if (avail_expr == NULL) {
1163                         pred_info->avail = trans_expr;
1164                         pred_info->found = 0;
1165                         fully_redundant  = 0;
1166                 } else {
1167                         /* expr is available, use the leader */
1168                         pred_info->avail    = avail_expr;
1169                         pred_info->found    = 1;
1170                         mode                = get_irn_mode(avail_expr);
1171                         partially_redundant = 1;
1172
1173                         if (first_avail == NULL)
1174                                 first_avail = avail_expr;
1175                         else if (first_avail != avail_expr)
1176                                 /* Multiple different expressions are available */
1177                                 fully_redundant = 0;
1178
1179                         DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1180                 }
1181         }
1182
1183 #if BETTER_GREED
1184         /* value is redundant from last iteration,
1185            but has not been removed from antic_in (is not optimized) */
1186         if (! environ->first_iter && is_redundant(block, expr))
1187                 return mode;
1188 #endif
1189
1190         /* If it is not the same value already existing along every predecessor
1191        and it is defined by some predecessor then it is partially redundant. */
1192         if (! partially_redundant || fully_redundant)
1193                 return NULL;
1194         return mode;
1195 }
1196
1197 /**
1198  * Updates the new_set of a block by adding the new_set of
1199  * the immediate dominating block.
1200  *
1201  * @param  the block
1202  */
1203 static void update_new_set(ir_node *block, ir_node *idom)
1204 {
1205         ir_node                *value;
1206         ir_node                *expr;
1207         ir_valueset_iterator_t  iter;
1208         block_info             *curr_info = get_block_info(block);
1209         block_info             *idom_info = get_block_info(idom);
1210         int                     updated   = 0;
1211
1212         DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1213         foreach_valueset(idom_info->new_set, value, expr, iter) {
1214                 /* inherit new_set from immediate dominator */
1215                 ir_valueset_insert(curr_info->new_set, value, expr);
1216                 /* replace in avail_out */
1217                 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1218         }
1219 #ifdef DEBUG_libfirm
1220         if (updated)
1221                 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1222 #endif
1223 } /* update_new_set */
1224
1225 #if BETTER_GREED
1226 /*
1227  * Returns redundant flag of node irn in block block.
1228  */
1229 static unsigned is_redundant(ir_node *block, ir_node *irn)
1230 {
1231         (void) block;
1232         (void) irn;
1233
1234         /* TODO Needs to use a flag, because antic_done should only be used
1235            if node is finally processed by insert_nodes. */
1236         return 0;
1237 }
1238 #endif
1239
1240 /**
1241  * Checks if hoisting irn is greedy.
1242  * Greedy hoisting means that there are non partially redundant nodes
1243  * hoisted. This happens if a partially redundant node has
1244  * non redundant predecessors.
1245  */
1246 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1247 {
1248         int block_arity = get_irn_arity(block);
1249         int arity = get_irn_arity(irn);
1250         int pos, i;
1251         block_info *info = get_block_info(block);
1252
1253         /* As long as the predecessor values are available in all predecessor blocks,
1254            we can hoist this value. */
1255         for (pos = 0; pos < block_arity; ++pos) {
1256                 ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1257                 block_info *pred_info  = get_block_info(pred_block);
1258
1259                 for (i = 0; i < arity; ++i) {
1260                         ir_node *pred     = get_irn_n(irn, i);
1261                         ir_node *value;
1262                         ir_node *leader;
1263                         ir_node *trans;
1264                         ir_node *trans_val;
1265                         ir_node *avail;
1266
1267                         if (get_loop_depth(get_irn_loop(pred_block)) > get_loop_depth(get_irn_loop(get_nodes_block(irn))))
1268                                 return 1;
1269
1270                         if (is_Phi(pred) && get_nodes_block(pred) == block)
1271                                 continue;
1272
1273                         DB((dbg, LEVEL_3, "pred %+F\n", pred));
1274                         value = identify(pred);
1275                         leader = ir_valueset_lookup(info->antic_in, value);
1276                         if (! leader)
1277                                 leader = pred;
1278                         DB((dbg, LEVEL_3, "lead %+F\n", leader));
1279                         trans   = get_translated(pred_block, leader);
1280                         if (! trans)
1281                                 trans = pred;
1282                         DB((dbg, LEVEL_3, "trans %+F\n", trans));
1283
1284                         if (is_Load(trans))
1285                                 continue;
1286
1287                         trans_val = identify(trans);
1288                         DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1289                         if (is_irn_constlike(trans_val))
1290                                 continue;
1291                         avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1292
1293                         if (! avail)
1294                                 return 1;
1295 #if 0
1296                         /* only optimize if predecessors have been optimized */
1297                         if (ir_valueset_lookup(info->antic_done, value) == NULL)
1298                                 return 1;
1299 #endif
1300
1301 #if 0
1302                         /* Try to reduce cut. Turned out to be not very effective. */
1303                         /* TODO Use out edges. */
1304                         if (get_irn_outs_computed(irn) && get_irn_n_outs(irn) > 1)
1305                                 return 1;
1306 #endif
1307                 }
1308         }
1309         return 0;
1310 }
1311
1312 /**
1313  * Perform insertion of partially redundant values.
1314  * For every block node, do the following:
1315  * 1.  Propagate the NEW_SETS of the dominator into the current block.
1316  * If the block has multiple predecessors,
1317  *     2a. Iterate over the ANTIC expressions for the block to see if
1318  *         any of them are partially redundant.
1319  *     2b. If so, insert them into the necessary predecessors to make
1320  *         the expression fully redundant.
1321  *     2c. Insert a new Phi merging the values of the predecessors.
1322  *     2d. Insert the new Phi, and the new expressions, into the
1323  *         NEW_SETS set.
1324  *
1325  * @param block  the block
1326  * @param ctx    the walker environment
1327  */
1328 static void insert_nodes_walker(ir_node *block, void *ctx)
1329 {
1330         pre_env                *env    = (pre_env*)ctx;
1331         int                     arity  = get_irn_arity(block);
1332         ir_node                *value;
1333         ir_node                *expr;
1334         block_info             *info;
1335         ir_node                *idom;
1336         int                     pos;
1337         ir_valueset_iterator_t  iter;
1338 #if BETTER_GREED
1339         plist_t *stack;
1340 #endif
1341
1342         /* only blocks */
1343         if (! is_Block(block))
1344                 return;
1345
1346         /* ensure that even the start block has a new_set */
1347         info = get_block_info(block);
1348         if (info->new_set)
1349                 ir_valueset_del(info->new_set);
1350         info->new_set = ir_valueset_new(16);
1351
1352         if (block == env->start_block)
1353                 return;
1354
1355         DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1356
1357         idom = get_Block_idom(block);
1358         update_new_set(block, idom);
1359
1360         /* process only path joining blocks */
1361         if (arity < 2) {
1362                 return;
1363         }
1364
1365 #if BETTER_GREED
1366         stack = plist_new();
1367         foreach_valueset(info->antic_in, value, expr, iter) {
1368                 /* inverse topologic */
1369                 plist_insert_front(stack, expr);
1370         }
1371 #endif
1372
1373         /* This is the main reason antic_in is preverred over antic_out;
1374            we may iterate over every anticipated value first and not
1375            over the predecessor blocks. */
1376         foreach_valueset(info->antic_in, value, expr, iter) {
1377                 ir_mode  *mode;
1378                 ir_node  *phi;
1379                 ir_node **phi_in;
1380
1381                 /* already done? */
1382                 if (ir_valueset_lookup(info->antic_done, value))
1383                         continue;
1384
1385                 /* filter phi nodes from antic_in */
1386                 if (is_Phi(expr))
1387                         continue;
1388
1389                 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1390
1391                 /* A value computed in the dominator is totally redundant.
1392                    Hence we have nothing to insert. */
1393                 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1394                         DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1395                         DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1396
1397 #if ! PARTLY_ONLY
1398                         ir_valueset_insert(info->antic_done, value, expr);
1399 #endif
1400                         continue;
1401                 }
1402
1403 #if !BETTER_GREED
1404                 if (is_hoisting_greedy(expr, block)) {
1405                         DB((dbg, LEVEL_2, "greedy\n"));
1406                         continue;
1407                 }
1408 #endif
1409
1410                 mode = is_partially_redundant(block, expr, value);
1411                 if (mode == NULL)
1412                         continue;
1413
1414 #if BETTER_GREED
1415                 if (is_hoisting_greedy(expr, block)) {
1416                         DB((dbg, LEVEL_2, "Better greed: greedy\n"));
1417                         continue;
1418                 }
1419 #endif
1420
1421 #if LOADS || DIVMODS
1422                 /* save old mode_M phis to remove keepalive edges later */
1423                 if (is_memop(expr)) {
1424                         ir_node *mem = get_memop_mem(expr);
1425                         if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1426                                 ir_nodeset_insert(env->keeps, mem);
1427                         }
1428                 }
1429 #endif
1430
1431 #ifdef DEBUG_libfirm
1432                 if (! is_Proj(expr)) {
1433                         if (env->first_iter)
1434                                 inc_stats(gvnpre_stats->first_iter_found);
1435                         inc_stats(gvnpre_stats->partially);
1436                 }
1437                 if (is_Load(expr))
1438                         inc_stats(gvnpre_stats->loads);
1439                 else if (is_Div(expr) || is_Mod(expr))
1440                         inc_stats(gvnpre_stats->divmods);
1441 #endif
1442
1443                 phi_in = XMALLOCN(ir_node *, arity);
1444
1445                 /* for each predecessor block */
1446                 for (pos = 0; pos < arity; ++pos) {
1447                         ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1448                         block_info *pred_info;
1449
1450                         pred_info = get_block_info(pred_block);
1451
1452                         if (! pred_info->found) {
1453                                 int i;
1454                                 int node_arity = get_irn_arity(expr);
1455                                 ir_node **in = XMALLOCNZ(ir_node *, node_arity);
1456                                 ir_node *trans;
1457                                 ir_node *new_value;
1458                                 ir_node *target_block = pred_block;
1459
1460                                 for (i = 0; i < node_arity; ++i) {
1461                                         ir_node *pred     = get_irn_n(expr, i);
1462                                         ir_node *value    = identify(pred);
1463                                         ir_node *leader;
1464                                         ir_node *trans;
1465                                         ir_node *trans_val;
1466                                         ir_node *avail;
1467
1468                                         DB((dbg, LEVEL_3, "pred %+F\n", pred));
1469                                         value = identify(pred);
1470                                         /* transform knowledge over anti leader into knowledge over leader */
1471                                         /* get leader for pred to lookup its translated value */
1472                                         leader = ir_valueset_lookup(info->antic_in, value);
1473                                         if (! leader)
1474                                                 leader = pred;
1475                                         DB((dbg, LEVEL_3, "lead %+F\n", leader));
1476
1477                                         trans   = get_translated(pred_block, leader);
1478                                         if (!trans)
1479                                                 trans = pred;
1480                                         DB((dbg, LEVEL_3, "trans %+F\n", trans));
1481
1482                                         /* in case of phi, we are done */
1483                                         if (is_Phi(pred) && get_nodes_block(pred) == block) {
1484                                                 in[i] = trans;
1485                                                 continue;
1486                                         }
1487
1488                                         /* handle load predecessor */
1489                                         if (is_Load(trans)) {
1490                                                 in[i] = trans;
1491                                                 continue;
1492                                         }
1493
1494                                         trans_val = identify(trans);
1495                                         DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1496                                         /* constants are always available but not in avail set */
1497                                         if (is_irn_constlike(trans_val)) {
1498                                                 in[i] = pred;
1499                                                 continue;
1500                                         }
1501
1502                                         /* use the leader
1503                                            In case of loads we need to make sure the hoisted
1504                                            loads are found despite their unique value. */
1505                                         avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1506                                         DB((dbg, LEVEL_3, "avail %+F\n", avail));
1507
1508                                         assert(avail && "predecessor has to be available");
1509                                         in[i] = avail;
1510                                 }
1511
1512                                 if (is_Proj(expr))
1513                                         target_block = get_nodes_block(in[0]);
1514
1515                                 /* copy node to represent the new value.
1516                                    We do not translate nodes that do not need translation,
1517                                    so we use the newly created nodes as value representatives only.
1518                                    Their block is not important, because we create new ones during
1519                                    the insert node phase. */
1520                                 trans = new_ir_node(
1521                                         get_irn_dbg_info(expr),
1522                                         environ->graph,
1523                                         target_block,
1524                                         get_irn_op(expr),
1525                                         get_irn_mode(expr),
1526                                         get_irn_arity(expr),
1527                                         in);
1528                                 free(in);
1529                                 /* We need the attribute copy here, because the Hash value of a
1530                                    node might depend on it. */
1531                                 copy_node_attr(environ->graph, expr, trans);
1532                                 new_value = identify_or_remember(trans);
1533
1534                                 DB((dbg, LEVEL_3, "Use new %+F in %+F because expr %+F(%+F) not available\n", trans, pred_block, expr, value));
1535
1536                                 /* value is now available in target block through trans
1537                                    insert (not replace) because it has not been available */
1538                                 ir_valueset_insert(pred_info->avail_out, value, trans);
1539                                 ir_valueset_insert(pred_info->avail_out, new_value, trans);
1540
1541                                 DB((dbg, LEVEL_4, "SET AVAIL value %+F trans %+F  in %+F\n", value, trans, pred_block));
1542                                 DB((dbg, LEVEL_4, "SET AVAIL newvalue %+F trans %+F  in %+F\n", new_value, trans, pred_block));
1543
1544                                 phi_in[pos] = trans;
1545                         } else {
1546                                 /* value available */
1547                                 phi_in[pos] = pred_info->avail;
1548                         }
1549                         DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1550                 }
1551
1552                 /* We do not connect tuples as they will be connected automatically
1553                    by the corresponding projections. */
1554                 if (get_irn_mode(expr) != mode_T) {
1555
1556                         phi = new_r_Phi(block, arity, phi_in, mode);
1557                         DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1558
1559                         /* this value is now available through the new phi */
1560                         ir_valueset_replace(info->avail_out, value, phi);
1561                         ir_valueset_insert(info->new_set, value, phi);
1562                 }
1563                 free(phi_in);
1564
1565                 /* already optimized this value in this block */
1566                 ir_valueset_insert(info->antic_done, value, expr);
1567                 env->changes |= 1;
1568         }
1569
1570 #if BETTER_GREED
1571         /* TODO Unfinished
1572            Better greed first determines which values are redundant
1573            and decides then which to take.
1574            insert_nodes needs to be split up for that. The cycle could be
1575            for each block: flag redundant nodes,
1576            use heuristic to adjust these flags (also consider antic_done),
1577            do insert nodes.
1578            This way we could decide if we hoist a non redundant node,
1579            if all its successors are redundant.
1580            Or we might try to minimize the cut along hoisted nodes and their
1581            non redundant successors.
1582          */
1583         if (env->changes) {
1584                 plist_element_t *it;
1585                 /* iterate in inverse topological order */
1586                 foreach_plist(stack, it) {
1587                         ir_node *irn   = (ir_node *)plist_element_get_value(it);
1588                         ir_node *block = get_nodes_block(irn);
1589                         int      j;
1590                         char     redundant = 1;
1591
1592                         /* does irn only have redundant successors? */
1593
1594                         /* TODO we need to use edges */
1595                         assert(get_irn_outs_computed(irn));
1596
1597                         for (j = get_irn_n_outs(irn) - 1; j >= 0; --j) {
1598                                 ir_node *succ = get_irn_out(irn, j);
1599
1600                                 /* if succ and irn are in the same block */
1601                                 if (get_nodes_block(succ) == block && is_redundant(block, succ)) {
1602                                         continue;
1603                                 } else {
1604                                         redundant = 0;
1605                                         break;
1606                                 }
1607                         }
1608
1609                         if (redundant)
1610                                 flag_redundant(irn, 1);
1611                 }
1612         }
1613         plist_free(stack);
1614 #endif
1615
1616 }
1617
1618 #if HOIST_HIGH
1619 static void update_new_set_walker(ir_node *block, void *ctx)
1620 {
1621         pre_env *env = (pre_env*)ctx;
1622
1623         if (! is_Block(block))
1624                 return;
1625         if (block == env->start_block)
1626                 return;
1627
1628         update_new_set(block, get_Block_idom(block));
1629 }
1630
1631 /**
1632  * Domtree block walker to insert nodes into the highest
1633  * possible block. .
1634  *
1635  */
1636 static void hoist_high(ir_node *block, void *ctx)
1637 {
1638         pre_env                *env        = (pre_env*)ctx;
1639         block_info             *curr_info;
1640         ir_valueset_iterator_t  iter;
1641         ir_node                *expr;
1642         ir_node                *value;
1643         int                     arity      = get_irn_arity(block);
1644
1645         if (! is_Block(block))
1646                 return;
1647
1648         curr_info = get_block_info(block);
1649
1650         if (curr_info->new_set)
1651                 ir_valueset_del(curr_info->new_set);
1652         curr_info->new_set = ir_valueset_new(16);
1653
1654         if (block == env->start_block)
1655                 return;
1656
1657         if (arity < 2)
1658                 return;
1659
1660         DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1661
1662         /* foreach entry optimized by insert node phase */
1663         foreach_valueset(curr_info->antic_done, value, expr, iter) {
1664                 int pos;
1665
1666                 if (is_memop(expr) || is_Proj(expr))
1667                         continue;
1668
1669                 DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
1670
1671                 /* visit hoisted expressions */
1672                 for (pos = 0; pos < arity; ++pos) {
1673                         /* standard target is predecessor block */
1674                         ir_node    *target     = get_Block_cfgpred_block(block, pos);
1675                         block_info *pred_info  = get_block_info(target);
1676                         ir_node    *avail;
1677                         ir_node    *new_target;
1678                         ir_node    *trans_expr;
1679                         ir_node    *trans_value;
1680                         ir_node    *dom;
1681                         int         avail_arity;
1682                         int         i;
1683                         unsigned    nest_depth;
1684                         block_info *dom_info;
1685
1686                         /* get phi translated value */
1687                         trans_expr  = get_translated(target, expr);
1688                         trans_value = identify(trans_expr);
1689                         avail       = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1690
1691                         /* get the used expr on this path */
1692
1693                         /* TODO when does this happen? */
1694                         if (avail == NULL)
1695                                 continue;
1696
1697                         avail_arity = get_irn_arity(avail);
1698                         value = identify(avail);
1699
1700                         /* anticipation border */
1701                         new_target  = NULL;
1702                         nest_depth  = get_loop_depth(get_irn_loop(target));
1703                         /* By using block (instead of target) as initial block,
1704                            we only allow hoisting into a common block of
1705                            both predecessor blocks. */
1706                         dom         = block;
1707
1708                         while (dom && dom != environ->start_block) {
1709
1710                                 dom = get_Block_idom(dom);
1711                                 dom_info = get_block_info(dom);
1712                                 DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
1713
1714                                 /* TODO antic_in means hoistable above block,
1715                                    but we need 'hoistable into block'. */
1716
1717                                 /* check if available node ist still anticipated and clean */
1718                                 if (! ir_valueset_lookup(dom_info->antic_in, value)) {
1719                                         DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
1720                                         break;
1721                                 }
1722
1723                                 nest_depth = get_loop_depth(get_irn_loop(dom));
1724
1725                                 /* do not hoist into loops */
1726                                 if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
1727                                         DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
1728                                         /* not a suitable location */
1729                                         continue;
1730                                 }
1731
1732                                 /* check if operands die */
1733
1734                                 /* check for uses on current path*/
1735                                 for (i = 0; i < avail_arity; i++) {
1736                                         ir_node   *pred       = get_irn_n(avail, i);
1737                                         ir_node   *pred_value = identify(pred);
1738
1739                                         if (dom == NULL)
1740                                                 break;
1741
1742                                         DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
1743
1744                                         if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
1745                                                 DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
1746                                                 dom = NULL;
1747                                                 break;
1748                                         }
1749
1750                                         /* check every successor */
1751                                         foreach_out_edge(pred, edge) {
1752                                                 ir_node *succ = get_edge_src_irn(edge);
1753                                                 DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
1754
1755                                                 /* check only successors on current path to end */
1756                                                 if (block_dominates(dom, get_nodes_block(succ))) {
1757                                                         ir_node *succ_value = identify(succ);
1758
1759                                                         /* Do we have another user than avail?
1760                                                            Then predecessor is not dead after removal of avail. */
1761                                                         if (succ_value != value) {
1762                                                                 DB((dbg, LEVEL_4, "fail: still used in %+F\n", succ));
1763                                                                 dom = NULL;
1764                                                                 break;
1765                                                         }
1766                                                 }
1767                                         }
1768                                 }
1769                                 if (dom)
1770                                         new_target = dom;
1771
1772                                 /* only try direct dominator */
1773                                 break;
1774                         }
1775
1776                         if (new_target) {
1777                                 block_info *target_info = get_block_info(new_target);
1778                                 int         nn_arity    = get_irn_arity(avail);
1779                                 ir_node   **in          = XMALLOCN(ir_node *, nn_arity);
1780                                 ir_node    *nn;
1781                                 int         i;
1782
1783                                 DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
1784                                 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1785
1786                                 for (i = 0; i < nn_arity; ++i) {
1787                                         ir_node *pred       = get_irn_n(avail, i);
1788                                         ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
1789                                         assert(avail_pred);
1790                                         in[i] = avail_pred;
1791                                 }
1792                                 nn = new_ir_node(
1793                                         get_irn_dbg_info(avail),
1794                                         environ->graph,
1795                                         new_target,
1796                                         get_irn_op(avail),
1797                                         get_irn_mode(avail),
1798                                         nn_arity,
1799                                         in);
1800                                 free(in);
1801
1802                                 identify_or_remember(nn);
1803                                 /* NOTE: Nodes are inserted into a dominating block and should
1804                                    be available from this point on. Currently we do not push
1805                                    the availability information through during the walk. */
1806                                 ir_valueset_insert(target_info->new_set, value, nn);
1807                         }
1808                 }
1809         }
1810 }
1811 #endif
1812
1813 /* --------------------------------------------------------
1814  * Elimination of fully redundant nodes
1815  * --------------------------------------------------------
1816  */
1817
1818 /**
1819  * Walker which finds redundant nodes using avail_out sets
1820  * and exchanges them for existing ones.
1821  * We cannot change the graph here as this would affect
1822  * the hash values of the nodes.
1823  *
1824  * @param irn  the node
1825  * @param ctx  the walker environment
1826  */
1827 static void eliminate(ir_node *irn, void *ctx)
1828 {
1829         pre_env *env = (pre_env*)ctx;
1830
1831         if (! is_Block(irn)) {
1832                 ir_node    *block = get_nodes_block(irn);
1833                 block_info *info  = get_block_info(block);
1834                 ir_node    *value = identify(irn);
1835
1836                 if (value != NULL) {
1837                         ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1838
1839 #if ! PARTLY_ONLY
1840                         if (expr && get_irn_idx(expr) <= env->last_idx)
1841                                 return;
1842 #endif
1843                         if (expr != NULL && expr != irn) {
1844                                 elim_pair *p = OALLOC(env->obst, elim_pair);
1845
1846                                 p->old_node = irn;
1847                                 p->new_node = expr;
1848                                 p->next     = env->pairs;
1849                                 if (get_irn_idx(expr) > env->last_idx)
1850                                         p->reason = FS_OPT_GVN_PARTLY;
1851                                 else
1852                                         p->reason = FS_OPT_GVN_FULLY;
1853                                 env->pairs = p;
1854                                 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1855                         }
1856                 }
1857         }
1858 }  /* eliminate */
1859
1860 /**
1861  * Do all the recorded changes and optimize
1862  * newly created Phi's.
1863  *
1864  * @param pairs  list of elimination pairs
1865  */
1866 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1867 {
1868         elim_pair             *p;
1869         ir_nodeset_iterator_t  iter;
1870         ir_node               *m_phi;
1871         ir_node               *end    = environ->end_node;
1872
1873         for (p = pairs; p != NULL; p = p->next) {
1874                 /* might be already changed */
1875                 p->new_node = skip_Id(p->new_node);
1876
1877                 // TEST
1878                 /* happens with load nodes */
1879                 if (p->new_node == p->old_node)
1880                         continue;
1881
1882                 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1883
1884                 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1885                  * which we can optimize here */
1886                 if (is_Phi(p->new_node)) {
1887                         int      i;
1888                         ir_node *res = NULL;
1889
1890                         for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1891                                 ir_node *pred = get_irn_n(p->new_node, i);
1892
1893                                 if (pred != p->old_node) {
1894                                         if (res) {
1895                                                 res = NULL;
1896                                                 break;
1897                                         }
1898                                         res = pred;
1899                                 }
1900                         }
1901                         if (res) {
1902                                 exchange(p->new_node, res);
1903                                 p->new_node = res;
1904                         }
1905                 }
1906                 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1907
1908                 exchange(p->old_node, p->new_node);
1909         }
1910
1911         /* remove keep alive edges of unused mode_M phis */
1912         foreach_ir_nodeset(keeps, m_phi, iter) {
1913                 remove_End_keepalive(end, m_phi);
1914         }
1915 }  /* eliminate_nodes */
1916
1917 /* --------------------------------------------------------
1918  * GVN PRE pass
1919  * --------------------------------------------------------
1920  */
1921
1922 /**
1923  * Gvn_Pre algorithm.
1924  *
1925  * @param irg   the graph
1926  */
1927 static void gvn_pre(ir_graph *irg, pre_env *env)
1928 {
1929         unsigned              antic_iter;
1930         unsigned              insert_iter;
1931
1932         DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1933
1934         /* allocate block info */
1935         irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1936
1937         ir_nodehashmap_init(&value_map);
1938
1939         /* generate exp_gen */
1940         irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
1941         dump_all_expgen_sets(env->list);
1942
1943         /* compute the avail_out sets for all blocks */
1944         dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
1945
1946         /* compute the anticipated value sets for all blocks */
1947         antic_iter      = 0;
1948         env->first_iter = 1;
1949
1950         /* antic_in passes */
1951         do {
1952                 ++antic_iter;
1953                 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
1954                 env->changes = 0;
1955                 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
1956                 env->first_iter = 0;
1957                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1958         } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
1959
1960         DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
1961
1962         ir_nodeset_init(env->keeps);
1963         insert_iter       = 0;
1964         env->first_iter   = 1;
1965         /* compute redundant expressions */
1966         do {
1967                 ++insert_iter;
1968                 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
1969                 env->changes = 0;
1970                 /* TODO topologically top down would be better; fewer iterations. */
1971                 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
1972                 env->first_iter = 0;
1973                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1974         } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
1975         DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
1976
1977 #if HOIST_HIGH
1978         /* An attempt to reduce lifetime by hoisting already hoisted values
1979            even higher, if their operands die. */
1980         dom_tree_walk_irg(irg, hoist_high, NULL, env);
1981         /* update avail_out for elimination */
1982         dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
1983 #endif
1984
1985         /* TODO deactivate edges to prevent intelligent removal of nodes */
1986         edges_deactivate(environ->graph);
1987
1988         /* last step: eliminate nodes */
1989         irg_walk_graph(irg, NULL, eliminate, env);
1990         eliminate_nodes(env->pairs, env->keeps);
1991
1992         ir_nodeset_destroy(env->keeps);
1993 }
1994
1995 /**
1996  * Gvn_Pre pass for graph irg.
1997  *
1998  * @param irg   the graph
1999  */
2000 void do_gvn_pre(ir_graph *irg)
2001 {
2002         struct obstack        obst;
2003         pre_env               env;
2004         ir_nodeset_t          keeps;
2005         optimization_state_t  state;
2006         block_info           *block_info;
2007
2008         /* bads and unreachables cause too much trouble with dominance
2009            loop info for endless loop detection
2010            no critical edges is GVN-PRE precondition
2011          */
2012         assure_irg_properties(irg,
2013                 IR_GRAPH_PROPERTY_NO_BADS
2014                 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
2015                 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
2016                 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
2017                 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
2018                 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
2019
2020         /* register a debug mask */
2021         FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
2022
2023         save_optimization_state(&state);
2024         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2025
2026         edges_activate(irg);
2027
2028         environ = &env;
2029         DEBUG_ONLY(init_stats();)
2030
2031         /* setup environment */
2032         obstack_init(&obst);
2033         env.graph        = irg;
2034         env.obst         = &obst;
2035         env.list         = NULL;
2036         env.start_block  = get_irg_start_block(irg);
2037         env.end_block    = get_irg_end_block(irg);
2038         env.end_node     = get_irg_end(irg);
2039         env.pairs        = NULL;
2040         env.keeps        = &keeps;
2041         /* last index is no node */
2042         env.last_idx     = get_irg_last_idx(irg) - 1;
2043
2044         /* Detect and set links of infinite loops to non-zero. */
2045         analyse_loops(irg);
2046
2047         /* Switch on GCSE. We need it to correctly compute
2048            the value of a node, which is independent from
2049            its block. */
2050         set_opt_global_cse(1);
2051         /* new_identities */
2052         if (irg->value_table != NULL)
2053                 del_pset(irg->value_table);
2054         /* initially assumed nodes in pset are 512 */
2055         irg->value_table = new_pset(compare_gvn_identities, 512);
2056
2057         /* do GVN-PRE pass */
2058         gvn_pre(irg, &env);
2059         DEBUG_ONLY(print_stats();)
2060
2061         /* clean up: delete all sets */
2062         for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2063                 free_block_info(block_info);
2064         }
2065
2066         DEBUG_ONLY(free_stats();)
2067         ir_nodehashmap_destroy(&value_map);
2068         obstack_free(&obst, NULL);
2069         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2070
2071         /* Pin the graph again.
2072            This is needed due to the use of set_opt_global_cse(1) */
2073         set_irg_pinned(irg, op_pin_state_pinned);
2074         restore_optimization_state(&state);
2075         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2076
2077         /* TODO there seem to be optimizations that try to use the existing
2078            value_table. */
2079         new_identities(irg);
2080 }
2081
2082 /* Creates an ir_graph pass for do_gvn_pre. */
2083 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2084 {
2085         return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
2086 }