f07c3e38b550148fe1a867efc74136bb40381b02
[libfirm] / ir / opt / gvn_pre.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Global Value Numbering Partial Redundancy Elimination
23  *          (VanDrunen Hosking 2004)
24  * @author  Michael Beck
25  * @brief
26  */
27 #include "config.h"
28
29 #include "debug.h"
30 #include "ircons.h"
31 #include "irdom.h"
32 #include "iredges.h"
33 #include "irflag.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
41 #include "irouts.h"
42 #include "irpass.h"
43 #include "valueset.h"
44 #include "irloop.h"
45
46 #include "irgraph_t.h"
47 #include "irnode_t.h"
48 #include "iropt_t.h"
49 #include "plist.h"
50
51 #define MAX_ANTIC_ITER 10
52 #define MAX_INSERT_ITER 3
53
54 #define HOIST_HIGH 1
55 #define BETTER_GREED 0
56 #define LOADS 1
57 #define DIVMODS 1
58 #define NO_INF_LOOPS 0
59
60 /** Additional info we need for every block. */
61 typedef struct block_info {
62         ir_valueset_t     *exp_gen;    /* contains this blocks clean expressions */
63         ir_valueset_t     *avail_out;  /* available values at block end */
64         ir_valueset_t     *antic_in;   /* clean anticipated values at block entry */
65         ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert_nodes() */
66         ir_valueset_t     *new_set;    /* new by hoisting made available values */
67         ir_nodehashmap_t  *trans;      /* contains translated nodes translated into block */
68         ir_node           *avail;      /* saves available node for insert_nodes */
69         int                found;      /* saves kind of availability for insert_nodes */
70         ir_node           *block;      /* block of the block_info */
71         struct block_info *next;       /* links all instances for easy access */
72 } block_info;
73
74 /**
75  * A pair of nodes to be exchanged.
76  * We have to defer the exchange because our hash-sets cannot
77  * find an already replaced node.
78  */
79 typedef struct elim_pair {
80         ir_node *old_node;      /* node that will be replaced */
81         ir_node *new_node;      /* replacement for old_node */
82         struct elim_pair *next; /* links all instances for easy access */
83         int     reason;         /* reason for the replacement */
84 } elim_pair;
85
86 /** environment for the GVN-PRE algorithm */
87 typedef struct pre_env {
88         ir_graph       *graph;        /* current graph */
89         struct obstack *obst;         /* obstack to allocate on */
90         ir_node        *start_block;  /* start block of the current graph */
91         ir_node        *end_block;    /* end block of the current graph */
92         ir_node        *end_node;     /* end node of the current graph */
93         block_info     *list;         /* block_info list head */
94         elim_pair      *pairs;        /* elim_pair list head */
95         ir_nodeset_t   *keeps;        /* a list of to be removed phis to kill their keep alive edges */
96         unsigned        last_idx;     /* last node index of input graph */
97         char            changes;      /* flag for fixed point iterations - non-zero if changes occurred */
98         char            first_iter;   /* non-zero for first fixed point iteration */
99         char            insert_phase; /* non-zero for first fixed point iteration */
100 } pre_env;
101
102 static pre_env *environ;
103 /* custom GVN value map */
104 static ir_nodehashmap_t value_map;
105
106 /* debug module handle */
107 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
108
109 #ifdef DEBUG_libfirm
110
111 /* --------------------------------------------------------
112  * Statistics
113  * --------------------------------------------------------
114  */
115
116 typedef struct gvnpre_statistics {
117         int replaced;
118         int partially;
119         int fully;
120         int loads;
121         int divmods;
122         int hoist_high;
123         int first_iter_found;
124         int antic_iterations;
125         int insert_iterations;
126         int infinite_loops;
127 } gvnpre_statistics;
128
129 gvnpre_statistics *gvnpre_stats = NULL;
130
131 static void init_stats()
132 {
133         gvnpre_stats = XMALLOCZ(gvnpre_statistics);
134 }
135
136 static void free_stats()
137 {
138         free(gvnpre_stats);
139         gvnpre_stats = NULL;
140 }
141
142 static void print_stats()
143 {
144         gvnpre_statistics *stats = gvnpre_stats;
145         DB((dbg, LEVEL_1, "replaced             : %d\n", stats->replaced));
146         DB((dbg, LEVEL_1, "antic_in iterations  : %d\n", stats->antic_iterations));
147         DB((dbg, LEVEL_1, "insert iterations    : %d\n", stats->insert_iterations));
148         DB((dbg, LEVEL_1, "infinite loops       : %d\n", stats->infinite_loops));
149         DB((dbg, LEVEL_1, "fully redundant      : %d\n", stats->fully));
150         DB((dbg, LEVEL_1, "partially redundant  : %d\n", stats->partially));
151         DB((dbg, LEVEL_1, "  loads                : %d\n", stats->loads));
152         DB((dbg, LEVEL_1, "  Divs/Mods            : %d\n", stats->divmods));
153         DB((dbg, LEVEL_1, "  hoist high           : %d\n", stats->hoist_high));
154         DB((dbg, LEVEL_1, "  first iteration      : %d\n", stats->first_iter_found));
155 }
156
157 #define set_stats(var, value) (var)=(value)
158 #define inc_stats(var)        ((var)+=1)
159
160 /* --------------------------------------------------------
161  * Dump valuesets
162  * --------------------------------------------------------
163  */
164
165 /**
166  * Dump a value set.
167  *
168  * @param set    the set to dump
169  * @param txt    a text to describe the set
170  * @param block  the owner block of the set
171  */
172 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
173 {
174         ir_valueset_iterator_t iter;
175         ir_node *value, *expr;
176         int i;
177
178         DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
179         i = 0;
180         foreach_valueset(set, value, expr, iter) {
181                 if ((i & 3) == 3)
182                         DB((dbg, LEVEL_2, "\n"));
183                 if (value != expr)
184                         DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
185                 else
186                         DB((dbg, LEVEL_2, " %+F,", expr));
187                 ++i;
188         }
189         DB((dbg, LEVEL_2, "\n}\n"));
190 }  /* dump_value_set */
191
192 /**
193  * Dump all exp_gen value sets.
194  *
195  * @param list  the list of block infos to retrieve the sets from
196  */
197 static void dump_all_expgen_sets(block_info *list)
198 {
199         block_info *block_info;
200
201         for (block_info = list; block_info != NULL; block_info = block_info->next) {
202                 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
203         }
204 }
205
206 #else
207
208 #define dump_all_expgen_sets(list)
209 #define dump_value_set(set, txt, block)
210
211 #endif /* DEBUG_libfirm */
212
213 /* --------------------------------------------------------
214  * GVN Functions
215  * --------------------------------------------------------
216  */
217
218 /**
219  * Compares node collisions in valuetable.
220  * Modified identities_cmp().
221  */
222 static int compare_gvn_identities(const void *elt, const void *key)
223 {
224         ir_node *a = (ir_node *)elt;
225         ir_node *b = (ir_node *)key;
226         int i, irn_arity_a;
227
228         if (a == b) return 0;
229
230         /* phi nodes kill predecessor values and are always different */
231         if (is_Phi(a) || is_Phi(b))
232                 return 1;
233
234         if ((get_irn_op(a) != get_irn_op(b)) ||
235             (get_irn_mode(a) != get_irn_mode(b))) return 1;
236
237         /* compare if a's in and b's in are of equal length */
238         irn_arity_a = get_irn_arity(a);
239         if (irn_arity_a != get_irn_arity(b))
240                 return 1;
241
242         /* blocks are never the same */
243         if (is_Block(a) || is_Block(b))
244                 return 1;
245
246         /* TODO depends on load optimization */
247         if (get_irn_pinned(a) == op_pin_state_pinned) {
248                 /* for pinned nodes, the block inputs must be equal */
249                 if (get_irn_n(a, -1) != get_irn_n(b, -1))
250                         return 1;
251         } else {
252                 /* we need global values independently from their blocks */
253                 assert(get_opt_global_cse());
254         }
255
256         /* compare a->in[0..ins] with b->in[0..ins] */
257         for (i = 0; i < irn_arity_a; ++i) {
258                 ir_node *pred_a = get_irn_n(a, i);
259                 ir_node *pred_b = get_irn_n(b, i);
260                 if (pred_a != pred_b) {
261                         /* if both predecessors are CSE neutral they might be different */
262                         if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
263                                 return 1;
264                 }
265         }
266
267         /*
268          * here, we already now that the nodes are identical except their
269          * attributes
270          */
271         if (a->op->ops.node_cmp_attr)
272                 return a->op->ops.node_cmp_attr(a, b);
273
274         return 0;
275 }
276
277 /**
278  * Identify does a lookup in the GVN valuetable.
279  * To be used when no new GVN values are to be created.
280  *
281  * @param e  a node representing an expression
282  * @return a node representing the value
283  */
284 static ir_node *identify(ir_node *irn)
285 {
286         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
287         if (value)
288                 return value;
289         /* irn represents a new value */
290         return identify_remember(irn);
291 }
292
293 /**
294  * remember() adds node irn to the GVN valuetable.
295  * Identify_remember only identifies values of nodes with the
296  * same predecessor nodes (not values). By creating a node from the predecessor
297  * values, a true valuetree is built. Phis kill their predecessor value,
298  * so no circular dependencies need to be resolved.
299  *
300  * TODO Improvement:
301  *      Maybe this could be implemented with a custom node hash that takes
302  *      phi nodes and true values (instead of predecessors) into account,
303  *      resulting in value numbers.
304  *
305  * @param irn  a node representing an expression
306  * @return     the value of the expression
307  */
308 static ir_node *remember(ir_node *irn)
309 {
310         int       arity   = get_irn_arity(irn);
311         int       i;
312         int       changed = 0;
313         ir_node **in      = XMALLOCN(ir_node *, arity);
314         ir_node  *value;
315
316         DB((dbg, LEVEL_4, "Remember %+F\n", irn));
317
318         for (i = 0; i < arity; ++i) {
319                 ir_node *pred       = get_irn_n(irn, i);
320                 ir_node *pred_value = identify(pred);
321
322                 /* phi will be translated anyway, so kill the predecessor values
323                    also prevents circular dependencies */
324                 if (is_Phi(pred)) {
325                         /* every phi represents its own value */
326                         in[i] = pred;
327                         continue;
328                 }
329
330                 /* predecessor is not its value representation */
331                 if (pred != pred_value)
332                         changed = 1;
333                 in[i] = pred_value;
334         }
335
336         if (changed) {
337                 /* create representative for */
338                 ir_node *nn = new_ir_node(
339                         get_irn_dbg_info(irn),
340                         get_irn_irg(irn),
341                         get_nodes_block(irn),
342                         get_irn_op(irn),
343                         get_irn_mode(irn),
344                         get_irn_arity(irn),
345                         in);
346                 copy_node_attr(environ->graph, irn, nn);
347
348                 /* now the value can be determined */
349                 value = identify_remember(nn);
350         } else {
351                 value = identify_remember(irn);
352         }
353         free(in);
354
355         ir_nodehashmap_insert(&value_map, irn, value);
356
357         return value;
358 }
359
360 /** When the value map has been built we may lookup expressions
361  *  and remember them if new.
362  */
363 static ir_node *identify_or_remember(ir_node *irn)
364 {
365         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
366         if (value)
367                 return value;
368         else
369                 return remember(irn);
370 }
371
372 /* --------------------------------------------------------
373  * Block info
374  * --------------------------------------------------------
375  */
376
377 /**
378  * Allocate block info for block block.
379  *
380  * @param block   the block
381  * @param env     the environment
382  */
383 static void alloc_block_info(ir_node *block, pre_env *env)
384 {
385         block_info *info = OALLOC(env->obst, block_info);
386
387         set_irn_link(block, info);
388         info->exp_gen    = ir_valueset_new(16);
389         info->avail_out  = ir_valueset_new(16);
390         info->antic_in   = ir_valueset_new(16);
391         info->antic_done = ir_valueset_new(16);
392         info->trans = XMALLOC(ir_nodehashmap_t);
393         ir_nodehashmap_init(info->trans);
394
395         info->new_set = NULL;
396         info->avail   = NULL;
397         info->block   = block;
398         info->found   = 1;
399
400         info->next = env->list;
401         env->list  = info;
402 }  /* alloc_block_info */
403
404 static void free_block_info(block_info *block_info)
405 {
406         ir_valueset_del(block_info->exp_gen);
407         ir_valueset_del(block_info->avail_out);
408         ir_valueset_del(block_info->antic_in);
409         if (block_info->trans) {
410                 ir_nodehashmap_destroy(block_info->trans);
411                 free(block_info->trans);
412         }
413         if (block_info->new_set)
414                 ir_valueset_del(block_info->new_set);
415 }
416
417 /**
418  * Bottom up walker that ensures that every block gets a block info.
419  *
420  * @param irn   the node
421  * @param ctx   the environment
422  */
423 static void block_info_walker(ir_node *irn, void *ctx)
424 {
425         if (is_Block(irn)) {
426                 pre_env *env = (pre_env*)ctx;
427                 alloc_block_info(irn, env);
428         }
429 }
430
431 /**
432  * Returns the block info of a block.
433  */
434 static block_info *get_block_info(ir_node *block)
435 {
436         return (block_info*)get_irn_link(block);
437 }
438
439 /* --------------------------------------------------------
440  * Infinite loop analysis
441  * --------------------------------------------------------
442  */
443
444 /**
445  * Walker to set block marks and loop links to 0.
446  */
447 static void clear_block_mark_loop_link(ir_node *block, void *env)
448 {
449         (void) env;
450
451         if (is_Block(block)) {
452                 set_Block_mark(block, 0);
453                 set_loop_link(get_irn_loop(block), NULL);
454         }
455 }
456
457 /**
458  * Returns non-zero if block is part of real loop loop.
459  */
460
461 static unsigned in_loop(ir_node *block, ir_loop *loop)
462 {
463         ir_loop *l     = get_irn_loop(block);
464         ir_loop *outer = get_irg_loop(environ->graph);
465
466         while (l != loop) {
467                 /* loop tree root is not a loop */
468                 if (l == NULL || l == outer)
469                         return 0;
470                 l = get_loop_outer_loop(l);
471         }
472         return 1;
473 }
474
475 /**
476  * Returns the outermost real loop of loop.
477  */
478 static ir_loop *get_loop_outermost(ir_loop *loop)
479 {
480         ir_loop *outer = get_irg_loop(environ->graph);
481         ir_loop *l     = loop;
482         ir_loop *last  = NULL;
483
484         while(l != outer) {
485                 last = l;
486                 l = get_loop_outer_loop(l);
487         }
488         return last;
489 }
490
491 /**
492  * Topologic bottom-up walker sets links of infinite loops to non-zero.
493  * Block marks are used to flag blocks reachable (from end) on the one hand,
494  * on the other hand they are set if the block is not part of an infinite loop.
495  */
496 static void infinite_loop_walker(ir_node *block, void *env)
497 {
498         int arity;
499         int i;
500         (void) env;
501
502         if (! is_Block(block))
503                 return;
504
505         /* start block has no predecessors */
506         if (block == environ->start_block)
507                 return;
508
509         arity = get_irn_arity(block);
510
511         /* block not part of a real loop: no infinite loop */
512         if (get_irn_loop(block) == get_irg_loop(environ->graph))
513                 set_Block_mark(block, 1);
514
515         if (get_Block_mark(block)) {
516                 /* reachable block: mark all cf predecessors */
517                 for (i = 0; i < arity; ++i) {
518                         ir_node *pred = get_Block_cfgpred_block(block, i);
519                         if (is_Bad(pred))
520                                 continue;
521                         set_Block_mark(pred, 1);
522                 }
523         } else {
524                 /* We are in a real loop and see an unreachable block. */
525                 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
526
527                 /* flag loop as infinite */
528                 set_loop_link(outermost_loop, outermost_loop);
529                 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
530
531                 /* The cf predecessors are unreachable, but can never be part of
532                    an infinite loop, because we just reached them. So we set the
533                    blockmark to prevent triggering the infinite loop detection. */
534
535                 /* passing information to the cf predecessors */
536                 for (i = 0; i < arity; ++i) {
537                         ir_node *pred = get_Block_cfgpred_block(block, i);
538
539                         if (is_Bad(pred))
540                                 continue;
541
542                         /* If our cf predecessor is in the same endless loop,
543                            it is also unreachable. */
544                         if (in_loop(pred, outermost_loop)) {
545                                 set_Block_mark(pred, 0);
546                         } else {
547                                 /* When we leave the unreachable loop, we artificially
548                                    declare the cf predecessor reachable. */
549                                 set_Block_mark(pred, 1);
550                         }
551                 }
552         }
553 }
554
555 /**
556  * Sets loop links of outermost infinite loops to non-zero.
557  */
558 static void analyse_loops(ir_graph *irg)
559 {
560         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
561
562         /* reset block mark and loop links */
563         irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
564
565         /* mark end block reachable */
566         set_Block_mark(get_irg_end_block(irg), 1);
567         irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
568
569         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
570 }
571
572 #if NO_INF_LOOPS
573 /**
574  * Returns non-zero if block is part of an infinite loop.
575  */
576 static unsigned is_in_infinite_loop(ir_node *block)
577 {
578         ir_loop *loop;
579
580         assert(is_Block(block));
581         loop = get_irn_loop(block);
582         assert(loop);
583
584         loop = get_loop_outermost(loop);
585         if (loop)
586                 return (get_loop_link(loop) != NULL);
587         else
588                 return 0;
589 }
590 #endif
591
592 /* --------------------------------------------------------
593  * GVN-PRE Exp_gen
594  * --------------------------------------------------------
595  */
596
597 /**
598  * Returns non-zero if a node is movable and a possible candidate for PRE.
599  */
600 static unsigned is_nice_value(ir_node *n)
601 {
602         ir_mode *mode = get_irn_mode(n);
603
604         /* positive group */
605
606         if (is_Phi(n))
607                 return 1;
608
609 #if LOADS || DIVMODS
610         if (is_Proj(n))
611                 return 1;
612 #endif
613
614 #if LOADS
615         if (is_Load(n))
616                 return (get_Load_volatility(n) == volatility_non_volatile);
617 #endif
618
619         /* negative group */
620
621         if (get_irn_pinned(n) == op_pin_state_pinned)
622                 return 0;
623
624         if (! mode_is_data(mode)) {
625                 if (! is_Div(n) && ! is_Mod(n))
626                         return 0;
627         }
628         return 1;
629 }
630
631 /**
632  * Checks if a node n is clean in block block for exp_gen.
633  *
634  * @param n      the node
635  * @param block  the block
636  * @return non-zero value for clean node
637  */
638 static unsigned is_clean_in_block_expgen(ir_node *n, ir_node *block)
639 {
640         int i, arity;
641
642         if (is_Phi(n))
643                 return 1;
644
645         if (! is_nice_value(n))
646                 return 0;
647
648         arity = get_irn_arity(n);
649         for (i = 0; i < arity; ++i) {
650                 ir_node *pred = get_irn_n(n, i);
651
652                 /* sufficient for exp_gen because block is always node's block */
653                 if (get_nodes_block(pred) != block)
654                         continue;
655
656                 /* predecessor is in block,
657                    so it needs to be clean */
658                 if (get_irn_link(pred)) {
659                         continue;
660                 } else {
661                         DB((dbg, LEVEL_3, "unclean %+F because pred %+F unclean\n", n, pred));
662                         return 0;
663                 }
664         }
665         return 1;
666 }
667
668 /**
669  * Topological walker puts nodes in top-down topological order into exp_gen set.
670  * Assumed to walk blockwise and nodewise topologically top-down.
671  *
672  * @param irn    the node
673  * @param ctx    the environment
674  */
675 static void topo_walker(ir_node *irn, void *ctx)
676 {
677         ir_node    *block;
678         block_info *info;
679         ir_node    *value;
680         (void) ctx;
681
682         if (is_Block(irn))
683                 return;
684
685         /* GVN step: remember the value. Predecessors need to be visited. */
686         value = remember(irn);
687
688         block = get_nodes_block(irn);
689         info  = get_block_info(block);
690
691         /* values for avail_out do not need to be clean */
692         ir_valueset_insert(info->avail_out, value, irn);
693
694         /* no need to put constants into the sets: they are always redundant */
695         if (! is_nice_value(irn) || is_irn_constlike(irn))
696                 return;
697
698         if (is_clean_in_block_expgen(irn, block)) {
699                 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
700
701                 ir_valueset_insert(info->exp_gen, value, irn);
702                 /* flag irn as clean*/
703                 set_irn_link(irn, irn);
704         } else {
705                 /* flag irn as not clean */
706                 set_irn_link(irn, NULL);
707         }
708 }
709
710 /* --------------------------------------------------------
711  * GVN-PRE Antic_in
712  * --------------------------------------------------------
713  */
714
715 /**
716  * Gets result of nodes phi translation into block.
717  *
718  * @param node   the node
719  * @param block  the target block
720  *
721  * @return a phi translation of node node into block block or NULL
722  */
723 static ir_node *get_translated(ir_node *block, ir_node *node)
724 {
725         if (is_irn_constlike(node))
726                 return node;
727
728         return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
729 }
730
731 static ir_node *get_translated_pred(ir_node *block, int pos, ir_node *node)
732 {
733         ir_node *pred_block;
734         if (is_irn_constlike(node))
735                 return node;
736
737         pred_block = get_Block_cfgpred_block(block, pos);
738         return ir_nodehashmap_get(ir_node, get_block_info(pred_block)->trans, node);
739 }
740
741 /**
742  * Saves result of phi translation of node into predecessor
743  * at pos of block succ.
744  *
745  * @param node   the node
746  * @param succ   the successor of the translation target block
747  * @param pos    the position of the predecessor block
748  * @param trans  the translation result
749  *
750  */
751 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
752 {
753         if (is_irn_constlike(node))
754                 return;
755         /* insert or replace */
756         ir_nodehashmap_insert(map, node, trans);
757 }
758
759 /**
760  * Checks if node is hoistable into block.
761  *
762  * A clean node in block block can be hoisted above block succ.
763  * A node is not clean if its representative is killed in block block.
764  * The node can still be hoisted into block block.
765  *
766  * @param n      the node to be checked
767  * @param block  the block to be hoisted into
768  * @returns      block which node can be hoisted above
769  */
770 static unsigned is_hoistable_above(ir_node *node, ir_node *block, unsigned translated)
771 {
772         int i;
773         int arity = get_irn_arity(node);
774
775         /* make sure that node can be hoisted above block */
776
777         if (is_irn_constlike(node))
778                 return 1;
779
780         for (i = 0; i < arity; ++i) {
781                 block_info *info       = get_block_info(block);
782                 ir_node    *pred       = get_irn_n(node, i);
783                 ir_node    *pred_value = identify(pred);
784                 ir_node    *pred_block = get_nodes_block(pred);
785
786                 /* predecessor strictly dominating */
787                 if (block_strictly_dominates(pred_block, block))
788                         continue;
789
790                 /* if we didn't translate the exact representative we cannot translate */
791                 if (translated && !get_translated(pred_block, pred))
792                         return 0;
793
794                 if (! ir_valueset_lookup(info->antic_in, pred_value))
795                         return 0;
796         }
797         return 1;
798 }
799
800 #if LOADS || DIVMODS
801 /* Helper function to compare the values of pred and avail_pred. */
802 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
803 {
804         ir_node *avail_value  = identify(avail_pred);
805         ir_node *trans_pred   = get_translated_pred(block, pos, pred);
806         ir_node *value;
807
808         if (trans_pred == NULL)
809                 trans_pred = pred;
810         value = identify(trans_pred);
811
812         DB((dbg, LEVEL_3, "manual compare %+F  %+F\n", pred, avail_pred));
813
814         return (value == avail_value);
815 }
816 #endif
817
818 #if LOADS
819 /**
820  * Does phi translation for redundant load nodes only.
821  * Returns NULL for non-redundant loads, which need to be phi translated.
822  * Loads are compared by comparing their pointer values,
823  * and assuring that they are adjacent.
824  * This is equivalent to what phi_translation does,
825  * when a new node is created and then GCSE optimized resulting
826  * in an already available node.
827  */
828 static ir_node *phi_translate_load(ir_node *load, ir_node *block, int pos)
829 {
830         ir_node *mem   = get_Load_mem(load);
831         ir_node *trans = get_translated_pred(block, pos, mem);
832
833         if (trans == NULL)
834                 trans = mem;
835
836         /* no partial redundancy if this is a mode_M phi */
837         if (is_Proj(trans)) {
838                 /* The last memory operation in predecessor block */
839                 ir_node *avail_load = get_Proj_pred(trans);
840
841                 /* memop is a load with matching type */
842                 if (is_Load(avail_load) &&
843                             get_Load_mode(load) == get_Load_mode(avail_load)) {
844
845                         unsigned match = match_pred(get_Load_ptr(load), get_Load_ptr(avail_load), block, pos);
846
847                         if (match)
848                                 return avail_load;
849                 }
850         }
851         return NULL;
852 }
853 #endif
854
855 #if DIVMODS
856 /**
857  * Does phi translation for redundant Div/Mod nodes only.
858  * Returns NULL for non-redundant node, which needs to be phi translated.
859  */
860 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
861 {
862         ir_node *mem   = get_memop_mem(divmod);
863         ir_node *trans = get_translated_pred(block, pos, mem);
864
865         if (trans == NULL)
866                 trans = mem;
867
868         /* no partial redundancy if this is a mode_M phi */
869         if (is_Proj(trans)) {
870                 /* The last memory operation in predecessor block */
871                 ir_node *avail_op = get_Proj_pred(trans);
872
873                 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
874                         unsigned left, right;
875
876                         if (is_Div(avail_op)) {
877                                 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
878                                     get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
879
880                                         left  = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
881                                         right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
882
883                                         if (left && right)
884                                                 return avail_op;
885                                 }
886                         } else if (is_Mod(avail_op)) {
887                             if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
888
889                                         left  = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
890                                         right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
891
892                                         if (left && right)
893                                                 return avail_op;
894                                 }
895                         }
896                 }
897         }
898         return NULL;
899 }
900 #endif
901
902 /**
903  * Translates an expression above a Phi.
904  *
905  * @param node        the node
906  * @param block       the block the node is translated into
907  * @param pos         the input number of the destination block
908  *
909  * @return a node representing the translated value
910  */
911 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_node *pred_block)
912 {
913         int       i;
914         int       arity;
915         ir_node **in;
916         ir_node  *nn;
917         int       needed;
918
919         if (is_Phi(node)) {
920                 if (get_nodes_block(node) == block)
921                         return get_Phi_pred(node, pos);
922                 /* this phi does not need translation */
923                 return node;
924         }
925
926         arity = get_irn_arity(node);
927
928 #if LOADS
929         if (is_Load(node)) {
930                 ir_node *avail_load = phi_translate_load(node, block, pos);
931                 if (avail_load)
932                         return avail_load;
933         }
934 #endif
935
936 #if DIVMODS
937         if (is_Div(node) || is_Mod(node)) {
938                 ir_node *avail_op = phi_translate_divmod(node, block, pos);
939                 if (avail_op)
940                         return avail_op;
941         }
942 #endif
943
944         needed = 0;
945         /* insert phase enforces translation for previously not translated nodes */
946         if (environ->insert_phase)
947                 needed = 1;
948
949         in = XMALLOCN(ir_node *, arity);
950
951         for (i = 0; i < arity; ++i) {
952                 ir_node *pred       = get_irn_n(node, i);
953                 /* we cannot find this value in antic_in, because the value
954                    has (possibly) changed! */
955                 ir_node *pred_trans = get_translated(pred_block, pred);
956                 ir_node *expr;
957
958                 if (pred_trans == NULL) {
959                         /* reasons for this are:
960                            1. pred not dominated by block: use predecessor.
961                            2. dangling-represenatative: predecessor not translated.
962                            We cannot phi translate, it will be the wrong value. */
963                         expr = pred;
964                 } else {
965                         expr = pred_trans;
966                         /* predecessor value changed, so translation is needed */
967                         if (identify(expr) != identify(pred))
968                                 needed |= 1;
969                 }
970
971                 /* We need to build a value tree. But values cannot be translated,
972                    expressions can be. So we translate the expressions and let GVN
973                    identify their values. */
974                 in[i] = expr;
975         }
976
977         if (! needed)
978                 return node;
979
980         DB((dbg, LEVEL_3, "Translate\n"));
981         /* copy node to represent the new value.
982            We do not translate nodes that do not need translation,
983            so we use the newly created nodes as value representatives only.
984            Their block is not important, because we create new ones during
985            insert_nodes(). */
986         nn = new_ir_node(
987                 get_irn_dbg_info(node),
988                 environ->graph,
989                 get_Block_cfgpred_block(block, pos),
990                 get_irn_op(node),
991                 get_irn_mode(node),
992                 arity,
993                 in);
994         free(in);
995         /* We need the attribute copy here, because the Hash value of a
996            node might depend on it. */
997         copy_node_attr(environ->graph, node, nn);
998         /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
999            because it already uses availability. */
1000
1001         DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
1002         return nn;
1003 }
1004
1005 /**
1006  * Block-walker, computes Antic_in(block).
1007  * Builds a value tree out of the graph by translating values
1008  * over phi nodes.
1009  * Although the algorithm works on values, constructing the value tree
1010  * depends on actual representations through nodes and their actual
1011  * predecessors.
1012  * By using only one representative (GVN-PRE) for every value, we have to be
1013  * careful not to break the topological order during translation. If a node
1014  * needs to be translated, but its predecessor - a representative in the same
1015  * antic_in scope - has not been, then it has to be killed.
1016  * one-representative-problem: Two blocks yield the same value through
1017  *    completely different calculations. The value is common, but the
1018  *    representative cannot be phi translated, because the predecessors
1019  *    haven't been.
1020  *    If a value is in exp_gen and also in antic_in of the successor,
1021  *    a similar situation sets in.
1022  *
1023  * By using antic_in exclusively, we lose information about translated nodes.
1024  * We need to permanently keep the translated nodes list.
1025  * For insert_nodes() we actually need antic_out. But antic_out is not usable,
1026  * because every value had to be cross-looked up in every available set.
1027  *
1028  * If we used antic_out, the translated nodes list would not be necessary
1029  * permanently, because instead of looking for translated(antic_in) we could
1030  * just use antic_out of the predecessor block.
1031  *
1032  * @param block  the block
1033  * @param ctx    the walker environment
1034  */
1035 static void compute_antic(ir_node *block, void *ctx)
1036 {
1037         pre_env                *env       = (pre_env*)ctx;
1038         block_info             *succ_info;
1039         block_info             *info;
1040         ir_node                *succ;
1041         ir_node                *value;
1042         ir_node                *expr;
1043         size_t                  size;
1044         ir_valueset_iterator_t  iter;
1045         int                     n_succ;
1046
1047         /* filter blocks from topological walker */
1048         if (! is_Block(block))
1049                 return;
1050
1051         /* the end block has no successor */
1052         if (block == env->end_block)
1053                 return;
1054
1055         info = get_block_info(block);
1056         /* track changes */
1057         size = ir_valueset_size(info->antic_in);
1058         n_succ = get_Block_n_cfg_outs(block);
1059
1060         if (env->first_iter) {
1061 #if NO_INF_LOOPS
1062                 if (! is_in_infinite_loop(block)) {
1063                         foreach_valueset(info->exp_gen, value, expr, iter) {
1064                                 ir_valueset_insert(info->antic_in, value, expr);
1065                         }
1066                 }
1067 #else
1068                 foreach_valueset(info->exp_gen, value, expr, iter) {
1069                         ir_valueset_insert(info->antic_in, value, expr);
1070                 }
1071 #endif
1072         }
1073
1074         /* successor might have phi nodes */
1075         if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
1076                 succ      = get_Block_cfg_out(block, 0);
1077                 int pos   = get_Block_cfgpred_pos(succ, block);
1078                 succ_info = get_block_info(succ);
1079
1080                 /* initialize translated set */
1081                 if (env->first_iter) {
1082                         info->trans = XMALLOC(ir_nodehashmap_t);
1083                         ir_nodehashmap_init(info->trans);
1084                 }
1085
1086                 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1087                         /* we translate the expression over the phi node,
1088                            remember() builds the value tree */
1089                         ir_node *trans       = phi_translate(expr, succ, pos, block);
1090                         /* create new value if necessary */
1091                         ir_node *trans_value = identify_or_remember(trans);
1092                         ir_node *represent;
1093
1094                         DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1095
1096                         /* on value change (phi present) we need the translated node
1097                            to represent the new value for possible further translation. */
1098                         if (value != trans_value)
1099                                 represent = trans;
1100                         else
1101                                 represent = expr;
1102
1103                         if (is_hoistable_above(expr, block, 1))
1104                                 ir_valueset_replace(info->antic_in, trans_value, represent);
1105                         set_translated(info->trans, expr, represent);
1106                 }
1107         } else if (n_succ > 1) {
1108                 int         i;
1109                 ir_node    *common     = NULL;
1110                 ir_node    *succ0      = get_Block_cfg_out(block, 0);
1111                 block_info *succ0_info = get_block_info(succ0);
1112
1113                 /* disjoint of antic_ins */
1114                 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1115                         /* iterate over remaining successors */
1116                         for (i = 1; i < n_succ; ++i) {
1117                                 ir_node    *succ      = get_Block_cfg_out(block, i);
1118                                 block_info *succ_info = get_block_info(succ);
1119
1120                                 /* value in antic_in? */
1121                                 common = ir_valueset_lookup(succ_info->antic_in, value);
1122                                 if (common == NULL)
1123                                         break;
1124                         }
1125
1126                         if (common && is_hoistable_above(expr, block, 0)) {
1127                                 ir_valueset_insert(info->antic_in, value, expr);
1128                                 DB((dbg, LEVEL_3, "common and clean %+F(%+F) in %+F\n", expr, value, block));
1129                         }
1130                 }
1131         }
1132
1133         DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1134
1135         if (size != ir_valueset_size(info->antic_in))
1136                 env->changes |= 1;
1137 }
1138
1139 /* --------------------------------------------------------
1140  * Main algorithm Avail_out
1141  * --------------------------------------------------------
1142  */
1143
1144 /**
1145  * Computes Avail_out(block):
1146  *
1147  * Avail_in(block)  = Avail_out(dom(block))
1148  * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1149  *
1150  * Precondition:
1151  *  This function must be called in the top-down topological order:
1152  *  Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1153  *
1154  * @param block   the block
1155  * @param ctx     walker context
1156  */
1157 static void compute_avail_top_down(ir_node *block, void *ctx)
1158 {
1159         pre_env    *env   = (pre_env*)ctx;
1160         block_info *info;
1161
1162         if (block == env->end_block)
1163                 return;
1164
1165         info  = get_block_info(block);
1166
1167         /* Add all nodes from the immediate dominator.
1168            This ensures that avail_out contains the leader. */
1169         if (block != env->start_block) {
1170                 ir_node                *dom_block = get_Block_idom(block);
1171                 block_info             *dom_info  = get_block_info(dom_block);
1172                 ir_node                *value;
1173                 ir_node                *expr;
1174                 ir_valueset_iterator_t  iter;
1175
1176                 foreach_valueset(dom_info->avail_out, value, expr, iter) {
1177                         /* use first available expr as leader */
1178                         ir_valueset_replace(info->avail_out, value, expr);
1179                 }
1180         }
1181
1182         DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1183 }
1184
1185 /* --------------------------------------------------------
1186  * Main algorithm redundancy detection
1187  * --------------------------------------------------------
1188  */
1189
1190 /**
1191  * Flags node irn redundant depending on redundant parameter.
1192  */
1193 static void flag_redundant(ir_node *irn, unsigned redundant)
1194 {
1195         if (redundant) {
1196                 set_irn_link(irn, irn);
1197         } else {
1198                 set_irn_link(irn, NULL);
1199         }
1200 }
1201
1202 /*
1203  * Returns redundant flag of node irn.
1204  */
1205 static unsigned is_redundant(ir_node *irn)
1206 {
1207         return (get_irn_link(irn) != NULL);
1208 }
1209
1210 /**
1211  * Returns a valid mode if the value of expr is a partially redundant value.
1212  *
1213  * @param block   the block
1214  * @param expr    the expression
1215  *
1216  * @return mode of the expression if it is partially redundant else NULL
1217  */
1218 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1219 {
1220         ir_node *first_avail         = NULL;
1221         int      pos;
1222         int      arity               = get_irn_arity(block);
1223         int      fully_redundant     = 1;
1224         int      partially_redundant = 0;
1225         ir_mode *mode                = NULL;
1226
1227         DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1228
1229         /* for each predecessor blocks */
1230         for (pos = 0; pos < arity; ++pos) {
1231                 block_info *pred_info;
1232                 ir_node    *pred_block  = get_Block_cfgpred_block(block, pos);
1233                 ir_node    *trans_expr;
1234                 ir_node    *trans_value;
1235                 ir_node    *avail_expr;
1236
1237                 /* ignore bad blocks. */
1238                 if (is_Bad(pred_block))
1239                         continue;
1240
1241                 pred_info  = get_block_info(pred_block);
1242                 trans_expr = get_translated(pred_block, expr);
1243                 trans_value = identify(trans_expr);
1244
1245                 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1246                 /* value might be available through a not yet existing constant */
1247                 if (avail_expr == NULL && is_irn_constlike(trans_expr)) {
1248                         /* limit range of new constants */
1249                         ir_mode   *cmode  = get_irn_mode(trans_expr);
1250                         ir_tarval *upper = new_tarval_from_long(127, cmode);
1251                         ir_tarval *lower = new_tarval_from_long(-127, cmode);
1252                         ir_tarval *c     = get_Const_tarval(trans_expr);
1253
1254                         if (tarval_cmp(lower, c) != ir_relation_greater_equal &&
1255                                 tarval_cmp(c, upper) != ir_relation_greater_equal) {
1256                                 avail_expr = NULL;
1257                         } else {
1258                                 avail_expr = trans_expr;
1259                         }
1260             }
1261
1262                 DB((dbg, LEVEL_3, "avail_expr %+F\n", avail_expr));
1263
1264                 if (avail_expr == NULL) {
1265                         pred_info->avail = trans_expr;
1266                         pred_info->found = 0;
1267                         fully_redundant  = 0;
1268                 } else {
1269                         /* expr is available */
1270                         pred_info->avail    = avail_expr;
1271                         pred_info->found    = 1;
1272                         mode                = get_irn_mode(avail_expr);
1273                         partially_redundant = 1;
1274
1275                         if (first_avail == NULL)
1276                                 first_avail = avail_expr;
1277                         else if (first_avail != avail_expr)
1278                                 /* Multiple different expressions are available */
1279                                 fully_redundant = 0;
1280
1281                         DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1282                 }
1283         }
1284
1285 #if BETTER_GREED
1286         /* value is redundant from last iteration,
1287            but has not been removed from antic_in (is not optimized) */
1288         if (! environ->first_iter && is_redundant(expr))
1289                 return mode;
1290 #endif
1291
1292         /* If it is not the same value already existing along every predecessor
1293        and it is defined by some predecessor then it is partially redundant. */
1294         if (! partially_redundant || fully_redundant)
1295                 return NULL;
1296         return mode;
1297 }
1298
1299 /**
1300  * Updates the new_set of a block by adding the new_set of
1301  * the immediate dominating block.
1302  *
1303  * @param  the block
1304  */
1305 static void update_new_set(ir_node *block, ir_node *idom)
1306 {
1307         ir_node                *value;
1308         ir_node                *expr;
1309         ir_valueset_iterator_t  iter;
1310         block_info             *curr_info = get_block_info(block);
1311         block_info             *idom_info = get_block_info(idom);
1312         int                     updated   = 0;
1313
1314         DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1315         foreach_valueset(idom_info->new_set, value, expr, iter) {
1316                 /* inherit new_set from immediate dominator */
1317                 ir_valueset_insert(curr_info->new_set, value, expr);
1318                 /* replace in avail_out */
1319                 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1320         }
1321 #ifdef DEBUG_libfirm
1322         if (updated)
1323                 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1324 #endif
1325 } /* update_new_set */
1326
1327 /**
1328  * Checks if hoisting irn is greedy.
1329  * Greedy hoisting means that there are non partially redundant nodes
1330  * hoisted. This happens if a partially redundant node has
1331  * non redundant predecessors.
1332  */
1333 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1334 {
1335         int arity = get_irn_arity(irn);
1336         int i;
1337
1338         for (i = 0; i < arity; ++i) {
1339                 ir_node *pred  = get_irn_n(irn, i);
1340
1341                 /* predecessor is on current path we have to ensure redundancy */
1342                 if (! block_strictly_dominates(get_nodes_block(pred), block) && ! is_redundant(pred))
1343                         return 1;
1344         }
1345         return 0;
1346 }
1347
1348 /**
1349  * Perform insertion of partially redundant values.
1350  * For every block node, do the following:
1351  * 1.  Propagate the NEW_SETS of the dominator into the current block.
1352  * If the block has multiple predecessors,
1353  *     2a. Iterate over the ANTIC expressions for the block to see if
1354  *         any of them are partially redundant.
1355  *     2b. If so, insert them into the necessary predecessors to make
1356  *         the expression fully redundant.
1357  *     2c. Insert a new Phi merging the values of the predecessors.
1358  *     2d. Insert the new Phi, and the new expressions, into the
1359  *         NEW_SETS set.
1360  *
1361  * @param block  the block
1362  * @param ctx    the walker environment
1363  */
1364 static void insert_nodes(ir_node *block, void *ctx)
1365 {
1366         pre_env                *env    = (pre_env*)ctx;
1367         int                     arity  = get_irn_arity(block);
1368         ir_node                *value;
1369         ir_node                *expr;
1370         block_info             *info;
1371         ir_node                *idom;
1372         int                     pos;
1373         ir_valueset_iterator_t  iter;
1374
1375         /* only blocks */
1376         if (! is_Block(block))
1377                 return;
1378
1379         /* ensure that even the start block has a new_set */
1380         info = get_block_info(block);
1381         if (info->new_set)
1382                 ir_valueset_del(info->new_set);
1383         info->new_set = ir_valueset_new(16);
1384
1385         if (block == env->start_block)
1386                 return;
1387
1388         DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1389
1390         idom = get_Block_idom(block);
1391         update_new_set(block, idom);
1392
1393         /* process only path joining blocks */
1394         if (arity < 2) {
1395                 return;
1396         }
1397
1398 #if BETTER_GREED
1399         plist_t *stack = plist_new();
1400 #endif
1401
1402         /* This is the main reason we choose to use antic_in over antic_out;
1403            we may iterate over every anticipated value first and not
1404            over the predecessor blocks. */
1405         foreach_valueset(info->antic_in, value, expr, iter) {
1406                 ir_mode  *mode;
1407                 ir_node  *phi;
1408                 ir_node **phi_in;
1409
1410                 /* already done? */
1411                 if (ir_valueset_lookup(info->antic_done, value))
1412                         continue;
1413
1414 #if BETTER_GREED
1415                 plist_insert_front(stack, expr);
1416 #endif
1417
1418                 /* filter phi nodes from antic_in */
1419                 if (is_Phi(expr)) {
1420                         flag_redundant(expr, 1);
1421                         continue;
1422                 }
1423
1424                 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1425
1426                 /* A value computed in the dominator is totally redundant.
1427                    Hence we have nothing to insert. */
1428                 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1429                         flag_redundant(expr, 1);
1430                         DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1431                         DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1432
1433                         continue;
1434                 }
1435
1436 #if !BETTER_GREED
1437                 if (is_hoisting_greedy(expr, block))
1438                         continue;
1439 #endif
1440
1441                 mode = is_partially_redundant(block, expr, value);
1442                 if (mode == NULL) {
1443                         flag_redundant(expr, 0);
1444                         continue;
1445                 } else {
1446                         flag_redundant(expr, 1);
1447                 }
1448
1449 #if BETTER_GREED
1450                 if (is_hoisting_greedy(expr, block))
1451                         continue;
1452 #endif
1453
1454 #if LOADS || DIVMODS
1455                 /* save old mode_M phis to remove keepalive edges later */
1456                 if (is_memop(expr)) {
1457                         ir_node *mem = get_memop_mem(expr);
1458                         if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1459                                 ir_nodeset_insert(env->keeps, mem);
1460                         }
1461                 }
1462 #endif
1463
1464 #ifdef DEBUG_libfirm
1465                 if (! is_Proj(expr)) {
1466                         if (env->first_iter)
1467                                 inc_stats(gvnpre_stats->first_iter_found);
1468                         inc_stats(gvnpre_stats->partially);
1469                 }
1470                 if (is_Load(expr))
1471                         inc_stats(gvnpre_stats->loads);
1472                 else if (is_Div(expr) || is_Mod(expr))
1473                         inc_stats(gvnpre_stats->divmods);
1474 #endif
1475
1476                 phi_in = XMALLOCN(ir_node *, arity);
1477
1478                 /* for each predecessor block */
1479                 for (pos = 0; pos < arity; ++pos) {
1480                         ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1481                         block_info *pred_info;
1482
1483                         /* ignore bad blocks. */
1484                         if (is_Bad(pred_block)) {
1485                                 ir_graph *irg = get_irn_irg(pred_block);
1486                                 phi_in[pos] = new_r_Bad(irg, mode);
1487                                 continue;
1488                         }
1489                         pred_info = get_block_info(pred_block);
1490
1491                         if (! pred_info->found) {
1492                                 ir_node *trans = get_translated(pred_block, expr);
1493
1494                                 assert(trans);
1495                                 if (trans == expr) {
1496                                         /* has been translated if ancestor had a phi and was translated */
1497                                         /* also non phi descendants can be redundant, but have
1498                                            not yet been (phi) translated. */
1499                                         trans = phi_translate(expr, block, pos, pred_block);
1500                                         set_translated(pred_info->trans, expr, trans);
1501                                 }
1502
1503                                 DB((dbg, LEVEL_3, "Use new %+F in %+F because expr %+F not available\n", trans, pred_block, expr));
1504
1505                                 /* value is now available in target block through trans
1506                                    insert (not replace) because it has not been available */
1507                                 ir_valueset_insert(pred_info->avail_out, value, trans);
1508                                 phi_in[pos] = trans;
1509                         } else {
1510                                 /* value available */
1511                                 phi_in[pos] = pred_info->avail;
1512                         }
1513                         DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1514                 }
1515
1516                 /* We do not connect tuples as they will be connected automatically
1517                    by the corresponding projections. */
1518                 if (get_irn_mode(expr) != mode_T) {
1519
1520                         phi = new_r_Phi(block, arity, phi_in, mode);
1521                         DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1522
1523                         /* this value is now available through the new phi */
1524                         ir_valueset_replace(info->avail_out, value, phi);
1525                         ir_valueset_insert(info->new_set, value, phi);
1526                 }
1527                 free(phi_in);
1528
1529 #if 0
1530                 /* remove from antic_in, because expr is not anticipated
1531                    anymore in this block */
1532                 ir_valueset_remove_iterator(info->antic_in, &iter);
1533 #endif
1534                 ir_valueset_insert(info->antic_done, value, expr);
1535
1536                 env->changes |= 1;
1537         }
1538
1539 #if BETTER_GREED
1540         if (env->changes) {
1541                 plist_element_t *it;
1542                 /* iterate in inverse topological order */
1543                 foreach_plist(stack, it) {
1544                         ir_node *irn       = (ir_node *)plist_element_get_value(it);
1545                         ir_node *block     = get_nodes_block(irn);
1546                         int      j;
1547                         char     redundant = 1;
1548
1549                         /* does irn only have redundant successors? */
1550
1551                         if (! get_irn_outs_computed(irn))
1552                                 continue;
1553
1554                         for (j = get_irn_n_outs(irn) - 1; j >= 0; --j) {
1555                                 ir_node *succ = get_irn_out(irn, j);
1556
1557                                 /* if succ and irn are in the same block */
1558                                 if (get_nodes_block(succ) == block && is_redundant(succ)) {
1559                                         continue;
1560                                 } else {
1561                                         redundant = 0;
1562                                         break;
1563                                 }
1564                         }
1565
1566                         if (redundant)
1567                                 flag_redundant(irn, 1);
1568                 }
1569         }
1570         plist_free(stack);
1571 #endif
1572
1573 }
1574
1575 #if HOIST_HIGH
1576 /**
1577  * Dom tree block walker to insert nodes into the highest
1578  * possible block where their .
1579  *
1580  */
1581 static void hoist_high(ir_node *block, void *ctx)
1582 {
1583         pre_env                *env        = (pre_env*)ctx;
1584         block_info             *curr_info;
1585         ir_valueset_iterator_t  iter;
1586         ir_node                *expr;
1587         ir_node                *value;
1588         int                     arity      = get_irn_arity(block);
1589
1590         if (! is_Block(block))
1591                 return;
1592
1593         if (arity < 2)
1594                 return;
1595
1596         if (block == env->start_block)
1597                 return;
1598
1599         curr_info = get_block_info(block);
1600
1601         DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1602
1603         /* foreach entry optimized by insert_nodes */
1604         foreach_valueset(curr_info->antic_done, value, expr, iter) {
1605                 int pos;
1606
1607                 if (is_memop(expr) || is_Proj(expr))
1608                         continue;
1609
1610                 /* visit hoisted expressions */
1611                 for (pos = 0; pos < arity; ++pos) {
1612                         /* standard target is predecessor block */
1613                         ir_node    *target     = get_Block_cfgpred_block(block, pos);
1614                         block_info *pred_info  = get_block_info(target);
1615                         ir_node    *avail;
1616                         ir_node    *temp_target;
1617                         ir_node    *new_target;
1618                         ir_node    *trans_expr;
1619                         ir_node    *trans_value;
1620                         ir_node    *dom;
1621                         int         avail_arity;
1622                         int         i;
1623                         unsigned    nest_depth;
1624
1625                         /* get phi translated value */
1626                         trans_expr  = get_translated(target, expr);
1627                         trans_value = identify(trans_expr);
1628                         avail = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1629
1630                         /* get the used expr on this path */
1631                         if (avail == NULL)
1632                                 avail = trans_expr;
1633
1634                         value = identify(avail);
1635
1636                         /* anticipation border */
1637                         new_target  = NULL;
1638                         temp_target = NULL;
1639                         nest_depth  = get_loop_depth(get_irn_loop(target));
1640                         dom         = target;
1641
1642                         while(dom != environ->start_block) {
1643                                 block_info *dom_info;
1644
1645                                 dom = get_Block_idom(dom);
1646                                 dom_info = get_block_info(dom);
1647
1648                                 if (is_Bad(dom))
1649                                         break;
1650
1651                                 /* check if available node ist still anticipated and clean
1652                                    (clean is part of antic) */
1653                                 /* value may be hoisted abover block */
1654                                 if (! ir_valueset_lookup(dom_info->antic_in, value))
1655                                         break;
1656
1657                                 nest_depth = get_loop_depth(get_irn_loop(dom));
1658
1659                                 /* do not hoist into loops */
1660                                 if (get_loop_depth(get_irn_loop(dom)) <= nest_depth)
1661                                         new_target = dom;
1662
1663                                 temp_target = dom;
1664                         }
1665
1666                         /* No new target or does the available node already dominate the new_target? */
1667                         if (new_target) {
1668                                 DB((dbg, LEVEL_2, "leader block %+F\n", get_nodes_block(avail)));
1669                                 /* already same block or dominating?*/
1670                                 if (block_dominates(get_nodes_block(avail), new_target)) {
1671                                         DB((dbg, LEVEL_4, "fail: antic border %+F\n", block));
1672                                         new_target = NULL;
1673                                 }
1674                         }
1675
1676                         DB((dbg, LEVEL_2, "antic border %+F\n", new_target));
1677
1678                         avail_arity = get_irn_arity(avail);
1679                         /* check for uses of available ins on current path*/
1680                         for (i = 0; i < avail_arity; i++) {
1681                                 ir_node *pred       = get_irn_n(avail, i);
1682                                 //ir_node *pred_block = get_nodes_block(avail);
1683                                 int      j;
1684
1685                                 if (new_target == NULL)
1686                                         break;
1687
1688                                 /* TODO this might be a problem as we mainly operate on new nodes */
1689                                 if (! get_irn_outs_computed(pred)) {
1690                                         new_target = NULL;
1691                                         break;
1692                                 }
1693
1694                                 /**/
1695 #if 0
1696                                 if (! block_dominates(pred_block, new_target)) {
1697                                         new_target = pred_block;
1698                                 }
1699 #endif
1700
1701                                 /* check for every successor */
1702                                 for (j = get_irn_n_outs(pred) - 1; j >= 0; --j) {
1703                                         ir_node *succ = get_irn_out(pred, j);
1704
1705                                         /* is succ on this path? */
1706                                         if (block_dominates(new_target, get_nodes_block(succ))) {
1707                                                 ir_node *succ_value = identify(succ);
1708
1709                                                 /* Do we have another user than avail?
1710                                                    Then predecessor is not dead after removal of avail. */
1711                                                 if (succ_value != value) {
1712                                                         DB((dbg, LEVEL_4, "fail: still used %+F\n", succ));
1713                                                         new_target = NULL;
1714                                                         break;
1715                                                 }
1716                                         }
1717                                 }
1718                         }
1719
1720                         /* only one usage on our path */
1721                         if (new_target) {
1722                                 /* push the available node up into */
1723                                 set_nodes_block(avail, new_target);
1724
1725                                 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1726                         }
1727                 }
1728         }
1729 }
1730 #endif
1731
1732 /* --------------------------------------------------------
1733  * Elimination of fully redundant nodes
1734  * --------------------------------------------------------
1735  */
1736
1737 /**
1738  * Walker which finds redundant nodes using avail_out sets
1739  * and exchanges them for existing ones.
1740  * We cannot change the graph here as this would affect
1741  * the hash values of the nodes.
1742  *
1743  * @param irn  the node
1744  * @param ctx  the walker environment
1745  */
1746 static void eliminate(ir_node *irn, void *ctx)
1747 {
1748         pre_env *env = (pre_env*)ctx;
1749
1750         if (! is_Block(irn)) {
1751                 ir_node    *block = get_nodes_block(irn);
1752                 block_info *info  = get_block_info(block);
1753                 ir_node    *value = identify(irn);
1754
1755                 if (value != NULL) {
1756                         ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1757
1758                         if (expr != NULL && expr != irn) {
1759                                 elim_pair *p = OALLOC(env->obst, elim_pair);
1760
1761                                 p->old_node = irn;
1762                                 p->new_node = expr;
1763                                 p->next     = env->pairs;
1764                                 if (get_irn_idx(expr) >= env->last_idx)
1765                                         p->reason = FS_OPT_GVN_PARTLY;
1766                                 else
1767                                         p->reason = FS_OPT_GVN_FULLY;
1768                                 env->pairs = p;
1769                                 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1770                         }
1771                 }
1772         }
1773 }  /* eliminate */
1774
1775 /**
1776  * Do all the recorded changes and optimize
1777  * newly created Phi's.
1778  *
1779  * @param pairs  list of elimination pairs
1780  */
1781 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1782 {
1783         elim_pair             *p;
1784         ir_nodeset_iterator_t iter;
1785         ir_node               *m_phi;
1786         ir_node               *end = environ->end_node;
1787
1788         for (p = pairs; p != NULL; p = p->next) {
1789                 /* might be already changed */
1790                 p->new_node = skip_Id(p->new_node);
1791
1792                 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1793
1794                 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1795                  * which we can optimize here */
1796                 if (is_Phi(p->new_node)) {
1797                         int      i;
1798                         ir_node *res = NULL;
1799
1800                         for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1801                                 ir_node *pred = get_irn_n(p->new_node, i);
1802
1803                                 if (pred != p->old_node) {
1804                                         if (res) {
1805                                                 res = NULL;
1806                                                 break;
1807                                         }
1808                                         res = pred;
1809                                 }
1810                         }
1811                         if (res) {
1812                                 exchange(p->new_node, res);
1813                                 p->new_node = res;
1814                         }
1815                 }
1816                 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1817
1818                 exchange(p->old_node, p->new_node);
1819         }
1820
1821         /* remove keep alive edges of unused mode_M phis */
1822         foreach_ir_nodeset(keeps, m_phi, iter) {
1823                 remove_End_keepalive(end, m_phi);
1824         }
1825 }  /* eliminate_nodes */
1826
1827 /* --------------------------------------------------------
1828  * GVN PRE pass
1829  * --------------------------------------------------------
1830  */
1831
1832 /**
1833  * Gvn_Pre algorithm.
1834  *
1835  * @param irg   the graph
1836  */
1837 static void gvn_pre(ir_graph *irg, pre_env *env)
1838 {
1839         unsigned              antic_iter;
1840         unsigned              insert_iter;
1841
1842         DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1843
1844         /* allocate block info */
1845         irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1846
1847         ir_nodehashmap_init(&value_map);
1848
1849         /* generate exp_gen */
1850         irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
1851         dump_all_expgen_sets(env->list);
1852
1853         /* compute the avail_out sets for all blocks */
1854         dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
1855
1856         /* compute the anticipated value sets for all blocks */
1857         antic_iter      = 0;
1858         env->first_iter = 1;
1859
1860         /* antic_in passes */
1861         do {
1862                 ++antic_iter;
1863                 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
1864                 env->changes = 0;
1865                 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
1866                 env->first_iter = 0;
1867                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1868         } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
1869
1870         DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
1871
1872         ir_nodeset_init(env->keeps);
1873         insert_iter       = 0;
1874         env->insert_phase = 1;
1875         env->first_iter   = 1;
1876         env->last_idx     = get_irg_last_idx(irg);
1877         /* compute redundant expressions */
1878         do {
1879                 ++insert_iter;
1880                 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
1881                 env->changes = 0;
1882                 /* TODO topologically top down would be better; fewer iterations. */
1883                 dom_tree_walk_irg(irg, insert_nodes, NULL, env);
1884                 env->first_iter = 0;
1885                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1886         } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
1887         DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
1888
1889 #if HOIST_HIGH
1890         dom_tree_walk_irg(irg, hoist_high, NULL, env);
1891 #endif
1892
1893         /* last step: eliminate nodes */
1894         irg_walk_graph(irg, NULL, eliminate, env);
1895         eliminate_nodes(env->pairs, env->keeps);
1896
1897         ir_nodeset_destroy(env->keeps);
1898 }
1899
1900 /**
1901  * Gvn_Pre pass for graph irg.
1902  *
1903  * @param irg   the graph
1904  */
1905 void do_gvn_pre(ir_graph *irg)
1906 {
1907         struct obstack        obst;
1908         pre_env               env;
1909         ir_nodeset_t          keeps;
1910         optimization_state_t  state;
1911         block_info           *block_info;
1912
1913         /* bads and unreachables cause too much trouble with dominance
1914            dominance
1915            loop info for endless loop detection
1916            no critical edges is GVN-PRE precondition
1917          */
1918         assure_irg_properties(irg,
1919                 IR_GRAPH_PROPERTY_NO_BADS
1920                 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
1921                 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
1922                 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
1923                 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
1924                 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
1925
1926         /* register a debug mask */
1927         FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
1928
1929         save_optimization_state(&state);
1930         environ = &env;
1931
1932         /* edges will crash if enabled due to our allocate on other obstack trick */
1933         edges_deactivate(irg);
1934         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
1935
1936         DEBUG_ONLY(init_stats();)
1937
1938         /* setup environment */
1939         obstack_init(&obst);
1940         env.graph        = irg;
1941         env.obst         = &obst;
1942         env.list         = NULL;
1943         env.start_block  = get_irg_start_block(irg);
1944         env.end_block    = get_irg_end_block(irg);
1945         env.end_node     = get_irg_end(irg);
1946         env.pairs        = NULL;
1947         env.keeps        = &keeps;
1948         env.insert_phase = 0;
1949
1950         /* Detect and set links of infinite loops to non-zero. */
1951         analyse_loops(irg);
1952
1953         /* Switch on GCSE. We need it to correctly compute
1954            the value of a node, which is independent from
1955            its block. */
1956         set_opt_global_cse(1);
1957         /* new_identities */
1958         if (irg->value_table != NULL)
1959                 del_pset(irg->value_table);
1960         /* initially assumed nodes in pset are 512 */
1961         irg->value_table = new_pset(compare_gvn_identities, 512);
1962
1963         /* do GVN-PRE pass */
1964         gvn_pre(irg, &env);
1965         DEBUG_ONLY(print_stats();)
1966
1967         /* clean up: delete all sets */
1968         for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
1969                 free_block_info(block_info);
1970         }
1971
1972         DEBUG_ONLY(free_stats();)
1973         ir_nodehashmap_destroy(&value_map);
1974         obstack_free(&obst, NULL);
1975         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
1976
1977         /* Pin the graph again.
1978            This is needed due to the use of set_opt_global_cse(1) */
1979         set_irg_pinned(irg, op_pin_state_pinned);
1980         restore_optimization_state(&state);
1981         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
1982
1983         /* TODO there are optimizations that try to use the existing value_table */
1984         new_identities(irg);
1985 }
1986
1987 /* Creates an ir_graph pass for do_gvn_pre. */
1988 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
1989 {
1990         return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
1991 }