GVN fixes
[libfirm] / ir / opt / gvn_pre.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Global Value Numbering Partial Redundancy Elimination
23  *          (VanDrunen Hosking 2004)
24  * @author  Michael Beck
25  * @brief
26  */
27 #include "config.h"
28
29 #include "debug.h"
30 #include "ircons.h"
31 #include "irdom.h"
32 #include "iredges.h"
33 #include "irflag.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
41 #include "irouts.h"
42 #include "irpass.h"
43 #include "valueset.h"
44 #include "irloop.h"
45
46 #include "irgraph_t.h"
47 #include "irnode_t.h"
48 #include "iropt_t.h"
49 #include "plist.h"
50
51 #define MAX_ANTIC_ITER 10
52 #define MAX_INSERT_ITER 3
53
54 #define HOIST_HIGH 0
55 #define BETTER_GREED 0
56 #define LOADS 1
57 #define DIVMODS 0
58 #define NO_INF_LOOPS 0
59
60 /** Additional info we need for every block. */
61 typedef struct block_info {
62         ir_valueset_t     *exp_gen;    /* contains this blocks clean expressions */
63         ir_valueset_t     *avail_out;  /* available values at block end */
64         ir_valueset_t     *antic_in;   /* clean anticipated values at block entry */
65         ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert nodes phase*/
66         ir_valueset_t     *new_set;    /* new by hoisting made available values */
67         ir_nodehashmap_t  *trans;      /* contains translated nodes translated into block */
68         ir_node           *avail;      /* saves available node for insert node phase */
69         int                found;      /* saves kind of availability for insert_node phase */
70         ir_node           *block;      /* block of the block_info */
71         struct block_info *next;       /* links all instances for easy access */
72 } block_info;
73
74 /**
75  * A pair of nodes to be exchanged.
76  * We have to defer the exchange because our hash-sets cannot
77  * find an already replaced node.
78  */
79 typedef struct elim_pair {
80         ir_node *old_node;      /* node that will be replaced */
81         ir_node *new_node;      /* replacement for old_node */
82         struct elim_pair *next; /* links all instances for easy access */
83         int     reason;         /* reason for the replacement */
84 } elim_pair;
85
86 /** environment for the GVN-PRE algorithm */
87 typedef struct pre_env {
88         ir_graph       *graph;        /* current graph */
89         struct obstack *obst;         /* obstack to allocate on */
90         ir_node        *start_block;  /* start block of the current graph */
91         ir_node        *end_block;    /* end block of the current graph */
92         ir_node        *end_node;     /* end node of the current graph */
93         block_info     *list;         /* block_info list head */
94         elim_pair      *pairs;        /* elim_pair list head */
95         ir_nodeset_t   *keeps;        /* a list of to be removed phis to kill their keep alive edges */
96         unsigned        last_idx;     /* last node index of input graph */
97         char            changes;      /* flag for fixed point iterations - non-zero if changes occurred */
98         char            first_iter;   /* non-zero for first fixed point iteration */
99         char            insert_phase; /* non-zero for first fixed point iteration */
100 } pre_env;
101
102 static pre_env *environ;
103 /* custom GVN value map */
104 static ir_nodehashmap_t value_map;
105
106 /* debug module handle */
107 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
108
109 #ifdef DEBUG_libfirm
110
111 /* --------------------------------------------------------
112  * Statistics
113  * --------------------------------------------------------
114  */
115
116 typedef struct gvnpre_statistics {
117         int replaced;
118         int partially;
119         int fully;
120         int loads;
121         int divmods;
122         int hoist_high;
123         int first_iter_found;
124         int antic_iterations;
125         int insert_iterations;
126         int infinite_loops;
127 } gvnpre_statistics;
128
129 gvnpre_statistics *gvnpre_stats = NULL;
130
131 static void init_stats()
132 {
133         gvnpre_stats = XMALLOCZ(gvnpre_statistics);
134 }
135
136 static void free_stats()
137 {
138         free(gvnpre_stats);
139         gvnpre_stats = NULL;
140 }
141
142 static void print_stats()
143 {
144         gvnpre_statistics *stats = gvnpre_stats;
145         DB((dbg, LEVEL_1, "replaced             : %d\n", stats->replaced));
146         DB((dbg, LEVEL_1, "antic_in iterations  : %d\n", stats->antic_iterations));
147         DB((dbg, LEVEL_1, "insert iterations    : %d\n", stats->insert_iterations));
148         DB((dbg, LEVEL_1, "infinite loops       : %d\n", stats->infinite_loops));
149         DB((dbg, LEVEL_1, "fully redundant      : %d\n", stats->fully));
150         DB((dbg, LEVEL_1, "partially redundant  : %d\n", stats->partially));
151         DB((dbg, LEVEL_1, "  loads                : %d\n", stats->loads));
152         DB((dbg, LEVEL_1, "  Divs/Mods            : %d\n", stats->divmods));
153         DB((dbg, LEVEL_1, "  hoist high           : %d\n", stats->hoist_high));
154         DB((dbg, LEVEL_1, "  first iteration      : %d\n", stats->first_iter_found));
155 }
156
157 #define set_stats(var, value) (var)=(value)
158 #define inc_stats(var)        ((var)+=1)
159
160 /* --------------------------------------------------------
161  * Dump valuesets
162  * --------------------------------------------------------
163  */
164
165 /**
166  * Dump a value set.
167  *
168  * @param set    the set to dump
169  * @param txt    a text to describe the set
170  * @param block  the owner block of the set
171  */
172 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
173 {
174         ir_valueset_iterator_t iter;
175         ir_node *value, *expr;
176         int i;
177
178         DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
179         i = 0;
180         foreach_valueset(set, value, expr, iter) {
181                 if ((i & 3) == 3)
182                         DB((dbg, LEVEL_2, "\n"));
183                 if (value != expr)
184                         DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
185                 else
186                         DB((dbg, LEVEL_2, " %+F,", expr));
187                 ++i;
188         }
189         DB((dbg, LEVEL_2, "\n}\n"));
190 }  /* dump_value_set */
191
192 /**
193  * Dump all exp_gen value sets.
194  *
195  * @param list  the list of block infos to retrieve the sets from
196  */
197 static void dump_all_expgen_sets(block_info *list)
198 {
199         block_info *block_info;
200
201         for (block_info = list; block_info != NULL; block_info = block_info->next) {
202                 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
203         }
204 }
205
206 #else
207
208 #define dump_all_expgen_sets(list)
209 #define dump_value_set(set, txt, block)
210
211 #endif /* DEBUG_libfirm */
212
213 /* --------------------------------------------------------
214  * GVN Functions
215  * --------------------------------------------------------
216  */
217
218 /**
219  * Compares node collisions in valuetable.
220  * Modified identities_cmp().
221  */
222 static int compare_gvn_identities(const void *elt, const void *key)
223 {
224         ir_node *a = (ir_node *)elt;
225         ir_node *b = (ir_node *)key;
226         int i, irn_arity_a;
227
228         if (a == b) return 0;
229
230         /* phi nodes kill predecessor values and are always different */
231         if (is_Phi(a) || is_Phi(b))
232                 return 1;
233
234         /* memops are not the same, even if we want optimize them
235            we have to take the order in account */
236         if (is_memop(a) || is_memop(b))
237                 return 1;
238
239 #if 0
240         if (is_Call(a) || is_Call(b))
241                 return 1;
242 #endif
243
244         if ((get_irn_op(a) != get_irn_op(b)) ||
245             (get_irn_mode(a) != get_irn_mode(b))) return 1;
246
247         /* compare if a's in and b's in are of equal length */
248         irn_arity_a = get_irn_arity(a);
249         if (irn_arity_a != get_irn_arity(b))
250                 return 1;
251
252         /* blocks are never the same */
253         if (is_Block(a) || is_Block(b))
254                 return 1;
255
256         /* TODO depends on load optimization */
257         if (get_irn_pinned(a) == op_pin_state_pinned) {
258                 /* for pinned nodes, the block inputs must be equal */
259                 if (get_irn_n(a, -1) != get_irn_n(b, -1))
260                         return 1;
261         } else {
262                 /* we need global values independently from their blocks */
263                 assert(get_opt_global_cse());
264         }
265
266         /* compare a->in[0..ins] with b->in[0..ins] */
267         for (i = 0; i < irn_arity_a; ++i) {
268                 ir_node *pred_a = get_irn_n(a, i);
269                 ir_node *pred_b = get_irn_n(b, i);
270                 if (pred_a != pred_b) {
271                         if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
272                                 return 1;
273                 }
274         }
275
276         /*
277          * here, we already now that the nodes are identical except their
278          * attributes
279          */
280         if (a->op->ops.node_cmp_attr)
281                 return a->op->ops.node_cmp_attr(a, b);
282
283         return 0;
284 }
285
286 /**
287  * Identify does a lookup in the GVN valuetable.
288  * To be used when no new GVN values are to be created.
289  *
290  * @param e  a node representing an expression
291  * @return a node representing the value
292  */
293 static ir_node *identify(ir_node *irn)
294 {
295         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
296         if (value)
297                 return value;
298         /* irn represents a new value, so return the leader */
299         return identify_remember(irn);
300 }
301
302 /**
303  * remember() adds node irn to the GVN valuetable.
304  * Identify_remember only identifies values of nodes with the
305  * same predecessor nodes (not values). By creating a node from the predecessor
306  * values/leaders, a true valuetree is built. Phis kill their predecessor value,
307  * so no circular dependencies need to be resolved.
308  *
309  * TODO Improvement:
310  *      Maybe this could be implemented with a custom node hash that takes
311  *      phi nodes and true values (instead of predecessors) into account,
312  *      resulting in value numbers.
313  * TODO This unnecessarily also handles nodes like calls, which are never equal.
314  *
315  * @param irn  a node representing an expression
316  * @return     the value of the expression
317  */
318 static ir_node *remember(ir_node *irn)
319 {
320         int       arity   = get_irn_arity(irn);
321         int       i;
322         int       changed = 0;
323         ir_node **in      = XMALLOCN(ir_node *, arity);
324         ir_node  *value;
325
326         for (i = 0; i < arity; ++i) {
327                 ir_node *pred       = get_irn_n(irn, i);
328                 /* value and leader at the same time */
329                 ir_node *pred_value = identify(pred);
330
331                 /* phi will be translated anyway, so kill the predecessor values
332                    this also prevents circular dependencies */
333                 if (is_Phi(pred)) {
334                         /* every phi represents its own value */
335                         in[i] = pred;
336                         continue;
337                 }
338
339                 /* predecessor is not its value representation/the leader */
340                 if (pred != pred_value)
341                         changed = 1;
342                 in[i] = pred_value;
343         }
344
345         if (changed) {
346                 /* create representative for */
347                 ir_node *nn = new_ir_node(
348                         get_irn_dbg_info(irn),
349                         get_irn_irg(irn),
350                         get_nodes_block(irn),
351                         get_irn_op(irn),
352                         get_irn_mode(irn),
353                         get_irn_arity(irn),
354                         in);
355                 copy_node_attr(environ->graph, irn, nn);
356
357                 /* now the value can be determined because the
358                    predecessors are the leaders */
359                 value = identify_remember(nn);
360         } else {
361                 value = identify_remember(irn);
362         }
363         free(in);
364
365         DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
366         ir_nodehashmap_insert(&value_map, irn, value);
367
368         return value;
369 }
370
371 /** When the value map has been built we may lookup expressions
372  *  and remember them if new.
373  */
374 static ir_node *identify_or_remember(ir_node *irn)
375 {
376         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
377         if (value)
378                 return value;
379         else
380                 return remember(irn);
381 }
382
383 /* --------------------------------------------------------
384  * Block info
385  * --------------------------------------------------------
386  */
387
388 /**
389  * Allocate block info for block block.
390  *
391  * @param block   the block
392  * @param env     the environment
393  */
394 static void alloc_block_info(ir_node *block, pre_env *env)
395 {
396         block_info *info = OALLOC(env->obst, block_info);
397
398         set_irn_link(block, info);
399         info->exp_gen    = ir_valueset_new(16);
400         info->avail_out  = ir_valueset_new(16);
401         info->antic_in   = ir_valueset_new(16);
402         info->antic_done = ir_valueset_new(16);
403         info->trans = XMALLOC(ir_nodehashmap_t);
404         ir_nodehashmap_init(info->trans);
405
406         info->new_set = NULL;
407         info->avail   = NULL;
408         info->block   = block;
409         info->found   = 1;
410
411         info->next = env->list;
412         env->list  = info;
413 }  /* alloc_block_info */
414
415 static void free_block_info(block_info *block_info)
416 {
417         ir_valueset_del(block_info->exp_gen);
418         ir_valueset_del(block_info->avail_out);
419         ir_valueset_del(block_info->antic_in);
420         if (block_info->trans) {
421                 ir_nodehashmap_destroy(block_info->trans);
422                 free(block_info->trans);
423         }
424         if (block_info->new_set)
425                 ir_valueset_del(block_info->new_set);
426 }
427
428 /**
429  * Bottom up walker that ensures that every block gets a block info.
430  *
431  * @param irn   the node
432  * @param ctx   the environment
433  */
434 static void block_info_walker(ir_node *irn, void *ctx)
435 {
436         if (is_Block(irn)) {
437                 pre_env *env = (pre_env*)ctx;
438                 alloc_block_info(irn, env);
439         }
440 }
441
442 /**
443  * Returns the block info of a block.
444  */
445 static block_info *get_block_info(ir_node *block)
446 {
447         return (block_info*)get_irn_link(block);
448 }
449
450 /* --------------------------------------------------------
451  * Infinite loop analysis
452  * --------------------------------------------------------
453  */
454
455 /**
456  * Walker to set block marks and loop links to 0.
457  */
458 static void clear_block_mark_loop_link(ir_node *block, void *env)
459 {
460         (void) env;
461
462         if (is_Block(block)) {
463                 set_Block_mark(block, 0);
464                 set_loop_link(get_irn_loop(block), NULL);
465         }
466 }
467
468 /**
469  * Returns non-zero if block is part of real loop loop.
470  */
471
472 static unsigned in_loop(ir_node *block, ir_loop *loop)
473 {
474         ir_loop *l     = get_irn_loop(block);
475         ir_loop *outer = get_irg_loop(environ->graph);
476
477         while (l != loop) {
478                 /* loop tree root is not a loop */
479                 if (l == NULL || l == outer)
480                         return 0;
481                 l = get_loop_outer_loop(l);
482         }
483         return 1;
484 }
485
486 /**
487  * Returns the outermost real loop of loop.
488  */
489 static ir_loop *get_loop_outermost(ir_loop *loop)
490 {
491         ir_loop *outer = get_irg_loop(environ->graph);
492         ir_loop *l     = loop;
493         ir_loop *last  = NULL;
494
495         while(l != outer) {
496                 last = l;
497                 l = get_loop_outer_loop(l);
498         }
499         return last;
500 }
501
502 /**
503  * Topologic bottom-up walker sets links of infinite loops to non-zero.
504  * Block marks are used to flag blocks reachable (from end) on the one hand,
505  * on the other hand they are set if the block is not part of an infinite loop.
506  */
507 static void infinite_loop_walker(ir_node *block, void *env)
508 {
509         int arity;
510         int i;
511         (void) env;
512
513         if (! is_Block(block))
514                 return;
515
516         /* start block has no predecessors */
517         if (block == environ->start_block)
518                 return;
519
520         arity = get_irn_arity(block);
521
522         /* block not part of a real loop: no infinite loop */
523         if (get_irn_loop(block) == get_irg_loop(environ->graph))
524                 set_Block_mark(block, 1);
525
526         if (get_Block_mark(block)) {
527                 /* reachable block: mark all cf predecessors */
528                 for (i = 0; i < arity; ++i) {
529                         ir_node *pred = get_Block_cfgpred_block(block, i);
530                         if (is_Bad(pred))
531                                 continue;
532                         set_Block_mark(pred, 1);
533                 }
534         } else {
535                 /* We are in a real loop and see an unreachable block. */
536                 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
537
538                 /* flag loop as infinite */
539                 set_loop_link(outermost_loop, outermost_loop);
540                 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
541
542                 /* The cf predecessors are unreachable, but can never be part of
543                    an infinite loop, because we just reached them. So we set the
544                    blockmark to prevent triggering the infinite loop detection. */
545
546                 /* passing information to the cf predecessors */
547                 for (i = 0; i < arity; ++i) {
548                         ir_node *pred = get_Block_cfgpred_block(block, i);
549
550                         if (is_Bad(pred))
551                                 continue;
552
553                         /* If our cf predecessor is in the same endless loop,
554                            it is also unreachable. */
555                         if (in_loop(pred, outermost_loop)) {
556                                 set_Block_mark(pred, 0);
557                         } else {
558                                 /* When we leave the unreachable loop, we artificially
559                                    declare the cf predecessor reachable. */
560                                 set_Block_mark(pred, 1);
561                         }
562                 }
563         }
564 }
565
566 /**
567  * Sets loop links of outermost infinite loops to non-zero.
568  */
569 static void analyse_loops(ir_graph *irg)
570 {
571         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
572
573         /* reset block mark and loop links */
574         irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
575
576         /* mark end block reachable */
577         set_Block_mark(get_irg_end_block(irg), 1);
578         irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
579
580         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
581 }
582
583 #if NO_INF_LOOPS
584 /**
585  * Returns non-zero if block is part of an infinite loop.
586  */
587 static unsigned is_in_infinite_loop(ir_node *block)
588 {
589         ir_loop *loop;
590
591         assert(is_Block(block));
592         loop = get_irn_loop(block);
593         assert(loop);
594
595         loop = get_loop_outermost(loop);
596         if (loop)
597                 return (get_loop_link(loop) != NULL);
598         else
599                 return 0;
600 }
601 #endif
602
603 /* --------------------------------------------------------
604  * GVN-PRE Exp_gen
605  * --------------------------------------------------------
606  */
607
608 /**
609  * Helper function to get the anti leader of node in block.
610  */
611 static ir_node *get_anti_leader(ir_node *node, ir_node *block)
612 {
613         block_info *info   = get_block_info(block);
614         ir_node    *value  = identify(node);
615         ir_node    *leader = ir_valueset_lookup(info->antic_in, value);
616
617         if (leader)
618                 return leader;
619         else
620                 return node;
621 }
622
623 /**
624  * Returns non-zero if a node is movable and a possible candidate for PRE.
625  */
626 static unsigned is_nice_value(ir_node *n)
627 {
628         ir_mode *mode = get_irn_mode(n);
629
630         /* positive group */
631
632         if (is_Phi(n))
633                 return 1;
634
635 #if LOADS || DIVMODS
636         if (is_Proj(n))
637                 return 1;
638 #else
639         if (is_Proj(n))
640                 return 0;
641 #endif
642
643 #if LOADS
644         if (is_Load(n))
645                 return (get_Load_volatility(n) == volatility_non_volatile);
646 #endif
647
648         /* negative group */
649
650         if (get_irn_pinned(n) == op_pin_state_pinned)
651                 return 0;
652
653         if (! mode_is_data(mode)) {
654                 if (! is_Div(n) && ! is_Mod(n))
655                         return 0;
656         }
657         return 1;
658 }
659
660 /**
661  * Checks if a node n is clean in block block for exp_gen.
662  *
663  * @param n      the node
664  * @param block  the block
665  * @return non-zero value for clean node
666  */
667 static unsigned is_clean_in_block_expgen(ir_node *n, ir_node *block)
668 {
669         int i, arity;
670
671         if (is_Phi(n))
672                 return 1;
673
674         if (! is_nice_value(n))
675                 return 0;
676
677         arity = get_irn_arity(n);
678         for (i = 0; i < arity; ++i) {
679                 ir_node *pred = get_irn_n(n, i);
680
681                 /* sufficient for exp_gen */
682                 if (get_nodes_block(pred) != block)
683                         continue;
684
685                 /* predecessor is in block,
686                    so it needs to be clean */
687                 if (get_irn_link(pred)) {
688                         continue;
689                 } else {
690                         DB((dbg, LEVEL_3, "unclean %+F because pred %+F unclean\n", n, pred));
691                         return 0;
692                 }
693         }
694         return 1;
695 }
696
697 /**
698  * Topological walker puts nodes in top-down topological order into exp_gen set.
699  * Assumed to walk blockwise and nodewise topologically top-down.
700  *
701  * @param irn    the node
702  * @param ctx    the environment
703  */
704 static void topo_walker(ir_node *irn, void *ctx)
705 {
706         ir_node    *block;
707         block_info *info;
708         ir_node    *value;
709         (void) ctx;
710
711         if (is_Block(irn))
712                 return;
713
714         /* GVN step: remember the value. Predecessors need to be visited. */
715         value = remember(irn);
716
717         block = get_nodes_block(irn);
718         info  = get_block_info(block);
719
720
721         /* no need to put constants into the sets: they are always redundant */
722         if (! is_nice_value(irn)) {
723                 /* filter unnecessary values from avail_out */
724                 ir_valueset_insert(info->avail_out, value, irn);
725
726                 if (is_irn_constlike(irn))
727                         return;
728         }
729
730         if (is_clean_in_block_expgen(irn, block)) {
731                 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
732
733                 ir_valueset_insert(info->exp_gen, value, irn);
734                 /* flag irn as clean*/
735                 set_irn_link(irn, irn);
736         } else {
737                 /* flag irn as not clean */
738                 set_irn_link(irn, NULL);
739         }
740 }
741
742 /* --------------------------------------------------------
743  * GVN-PRE Antic_in
744  * --------------------------------------------------------
745  */
746
747 /**
748  * Gets result of nodes phi translation into block.
749  *
750  * @param node   the node
751  * @param block  the target block
752  *
753  * @return a phi translation of node node into block block or NULL
754  */
755 static ir_node *get_translated(ir_node *block, ir_node *node)
756 {
757         if (is_irn_constlike(node))
758                 return node;
759
760         return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
761 }
762
763 static ir_node *get_translated_pred(ir_node *block, int pos, ir_node *node)
764 {
765         ir_node *pred_block;
766         if (is_irn_constlike(node))
767                 return node;
768
769         pred_block = get_Block_cfgpred_block(block, pos);
770         return ir_nodehashmap_get(ir_node, get_block_info(pred_block)->trans, node);
771 }
772
773 /**
774  * Saves result of phi translation of node into predecessor
775  * at pos of block succ.
776  *
777  * @param node   the node
778  * @param succ   the successor of the translation target block
779  * @param pos    the position of the predecessor block
780  * @param trans  the translation result
781  *
782  */
783 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
784 {
785         if (is_irn_constlike(node))
786                 return;
787         /* insert or replace */
788         ir_nodehashmap_insert(map, node, trans);
789 }
790
791 /**
792  * Checks if node is hoistable into block.
793  *
794  * A clean node in block block can be hoisted above block succ.
795  * A node is not clean if its representative is killed in block block.
796  * The node can still be hoisted into block block.
797  *
798  * @param n      the node to be checked
799  * @param block  the block to be hoisted into
800  * @returns      block which node can be hoisted above
801  */
802 static unsigned is_hoistable_above(ir_node *node, ir_node *block, unsigned translated)
803 {
804         int i;
805         int arity = get_irn_arity(node);
806
807         /* make sure that node can be hoisted above block */
808
809         if (is_irn_constlike(node))
810                 return 1;
811
812         for (i = 0; i < arity; ++i) {
813                 block_info *info       = get_block_info(block);
814                 ir_node    *pred       = get_irn_n(node, i);
815                 ir_node    *pred_value = identify(pred);
816                 ir_node    *pred_block = get_nodes_block(pred);
817
818                 /* predecessor strictly dominating */
819                 if (block_strictly_dominates(pred_block, block))
820                         continue;
821
822                 /* if we didn't translate the exact representative we cannot translate */
823                 if (translated && !get_translated(pred_block, pred))
824                         return 0;
825
826                 if (! ir_valueset_lookup(info->antic_in, pred_value))
827                         return 0;
828         }
829         return 1;
830 }
831
832 #if LOADS || DIVMODS
833 /* Helper function to compare the values of pred and avail_pred. */
834 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
835 {
836         ir_node *avail_value  = identify(avail_pred);
837         ir_node *trans_pred   = get_translated_pred(block, pos, pred);
838         ir_node *value;
839
840         if (trans_pred == NULL)
841                 trans_pred = pred;
842         value = identify(trans_pred);
843
844         DB((dbg, LEVEL_3, "manual compare %+F  %+F\n", pred, avail_pred));
845
846         return (value == avail_value);
847 }
848 #endif
849
850 #if LOADS
851 /**
852  * Does phi translation for redundant load nodes only.
853  * Returns NULL for non-redundant loads, which need to be phi translated.
854  * Loads are compared by comparing their pointer values,
855  * and assuring that they are adjacent.
856  * This is equivalent to what phi_translation does,
857  * when a new node is created and then GCSE optimized resulting
858  * in an already available node.
859  */
860 static ir_node *phi_translate_load(ir_node *load, ir_node *block, int pos)
861 {
862         ir_node *mem   = get_Load_mem(load);
863         ir_node *trans = get_translated_pred(block, pos, mem);
864
865         if (trans == NULL)
866                 trans = mem;
867
868         /* no partial redundancy if this is a mode_M phi */
869         if (is_Proj(trans)) {
870                 /* The last memory operation in predecessor block */
871                 ir_node *avail_load = get_Proj_pred(trans);
872
873                 /* memop is a load with matching type */
874                 if (is_Load(avail_load) &&
875                             get_Load_mode(load) == get_Load_mode(avail_load)) {
876
877                         unsigned match = match_pred(get_Load_ptr(load), get_Load_ptr(avail_load), block, pos);
878
879                         if (match)
880                                 return avail_load;
881                 }
882         }
883         return NULL;
884 }
885 #endif
886
887 #if DIVMODS
888 /**
889  * Does phi translation for redundant Div/Mod nodes only.
890  * Returns NULL for non-redundant node, which needs to be phi translated.
891  */
892 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
893 {
894         ir_node *mem   = get_memop_mem(divmod);
895         ir_node *trans = get_translated_pred(block, pos, mem);
896
897         if (trans == NULL)
898                 trans = mem;
899
900         /* no partial redundancy if this is a mode_M phi */
901         if (is_Proj(trans)) {
902                 /* The last memory operation in predecessor block */
903                 ir_node *avail_op = get_Proj_pred(trans);
904
905                 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
906                         unsigned left, right;
907
908                         if (is_Div(avail_op)) {
909                                 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
910                                     get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
911
912                                         left  = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
913                                         right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
914
915                                         if (left && right)
916                                                 return avail_op;
917                                 }
918                         } else if (is_Mod(avail_op)) {
919                             if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
920
921                                         left  = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
922                                         right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
923
924                                         if (left && right)
925                                                 return avail_op;
926                                 }
927                         }
928                 }
929         }
930         return NULL;
931 }
932 #endif
933
934 /**
935  * Translates an expression above a Phi.
936  *
937  * @param node        the node
938  * @param block       the block the node is translated into
939  * @param pos         the input number of the destination block
940  *
941  * @return a node representing the translated value
942  */
943 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_node *pred_block)
944 {
945         int       i;
946         int       arity;
947         ir_node **in;
948         ir_node  *target_block;
949         ir_node  *nn;
950         int       needed;
951
952         if (is_Phi(node)) {
953                 if (get_nodes_block(node) == block)
954                         return get_Phi_pred(node, pos);
955                 /* this phi does not need translation */
956                 return node;
957         }
958         arity = get_irn_arity(node);
959
960 #if LOADS
961         if (is_Load(node)) {
962                 ir_node *avail_load = phi_translate_load(node, block, pos);
963                 if (avail_load)
964                         return avail_load;
965         }
966 #endif
967
968 #if DIVMODS
969         if (is_Div(node) || is_Mod(node)) {
970                 ir_node *avail_op = phi_translate_divmod(node, block, pos);
971                 if (avail_op)
972                         return avail_op;
973         }
974 #endif
975
976         needed = 0;
977         /* insert phase enforces translation for previously not translated nodes */
978         if (environ->insert_phase)
979                 needed = 1;
980
981         in = XMALLOCN(ir_node *, arity);
982
983         /* A value has several representatives. The anti leader is chosen to be
984            the main representative. If we access a node as representative of a
985            value we always use the anti leader. The anti leader can be found by
986            antic_in(identify(node)). */
987         for (i = 0; i < arity; ++i) {
988                 /* get anti leader for pred to lookup its translated value */
989                 ir_node    *pred        = get_irn_n(node, i);
990                 ir_node    *anti_leader = get_anti_leader(pred, block);
991                 /* we cannot find this value in antic_in, because the value
992                    has (possibly) changed! */
993                 ir_node    *pred_trans  = get_translated(pred_block, anti_leader);
994                 ir_node    *expr;
995
996                 if (pred_trans == NULL) {
997                         expr = pred;
998                 } else {
999                         expr = pred_trans;
1000                         /* predecessor value changed, so translation is needed */
1001                         if (identify(expr) != identify(pred))
1002                                 needed |= 1;
1003                 }
1004
1005                 /* We need to build a value tree. But values cannot be translated,
1006                    expressions can be. So we translate the expressions and let GVN
1007                    identify their values. */
1008                 in[i] = expr;
1009         }
1010
1011         if (! needed)
1012                 return node;
1013
1014         DB((dbg, LEVEL_3, "Translate\n"));
1015
1016         target_block = get_Block_cfgpred_block(block, pos);
1017         if (is_Proj(node))
1018                 target_block = get_nodes_block(in[0]);
1019
1020         /* copy node to represent the new value.
1021            We do not translate nodes that do not need translation,
1022            so we use the newly created nodes as value representatives only.
1023            Their block is not important, because we create new ones during
1024            insert node phase. */
1025         nn = new_ir_node(
1026                 get_irn_dbg_info(node),
1027                 environ->graph,
1028                 target_block,
1029                 get_irn_op(node),
1030                 get_irn_mode(node),
1031                 arity,
1032                 in);
1033         free(in);
1034         /* We need the attribute copy here, because the Hash value of a
1035            node might depend on it. */
1036         copy_node_attr(environ->graph, node, nn);
1037         /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
1038            because it already uses availability. */
1039
1040         DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
1041         return nn;
1042 }
1043
1044 /**
1045  * Block-walker, computes Antic_in(block).
1046  * Builds a value tree out of the graph by translating values
1047  * over phi nodes.
1048  *
1049  * @param block  the block
1050  * @param ctx    the walker environment
1051  */
1052 static void compute_antic(ir_node *block, void *ctx)
1053 {
1054         pre_env                *env       = (pre_env*)ctx;
1055         block_info             *succ_info;
1056         block_info             *info;
1057         ir_node                *succ;
1058         ir_node                *value;
1059         ir_node                *expr;
1060         size_t                  size;
1061         ir_valueset_iterator_t  iter;
1062         int                     n_succ;
1063
1064         /* filter blocks from topological walker */
1065         if (! is_Block(block))
1066                 return;
1067
1068         /* the end block has no successor */
1069         if (block == env->end_block)
1070                 return;
1071
1072         info = get_block_info(block);
1073         /* track changes */
1074         size = ir_valueset_size(info->antic_in);
1075         n_succ = get_Block_n_cfg_outs(block);
1076
1077 #if 0
1078         if (env->first_iter) {
1079 #if NO_INF_LOOPS
1080                 if (! is_in_infinite_loop(block)) {
1081                         foreach_valueset(info->exp_gen, value, expr, iter) {
1082                                 ir_valueset_insert(info->antic_in, value, expr);
1083                         }
1084                 }
1085 #else
1086                 foreach_valueset(info->exp_gen, value, expr, iter) {
1087                         ir_valueset_insert(info->antic_in, value, expr);
1088                 }
1089 #endif
1090         }
1091 #endif
1092
1093         /* successor might have phi nodes */
1094         if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
1095                 succ      = get_Block_cfg_out(block, 0);
1096                 int pos   = get_Block_cfgpred_pos(succ, block);
1097                 succ_info = get_block_info(succ);
1098
1099                 /* initialize translated set */
1100                 if (env->first_iter) {
1101                         info->trans = XMALLOC(ir_nodehashmap_t);
1102                         ir_nodehashmap_init(info->trans);
1103                 }
1104
1105                 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1106                         /* we translate the expression over the phi node,
1107                            remember() builds the value tree */
1108                         ir_node *trans       = phi_translate(expr, succ, pos, block);
1109                         /* create new value if necessary */
1110                         ir_node *trans_value = identify_or_remember(trans);
1111                         ir_node *represent;
1112
1113                         DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1114
1115                         /* on value change (phi present) we need the translated node
1116                            to represent the new value for possible further translation. */
1117                         if (value != trans_value)
1118                                 represent = trans;
1119                         else
1120                                 represent = expr;
1121
1122                         if (is_hoistable_above(expr, block, 1))
1123                                 ir_valueset_insert(info->antic_in, trans_value, represent);
1124                         set_translated(info->trans, expr, represent);
1125                 }
1126                 /* add exp_gen */
1127                 if (env->first_iter) {
1128                         foreach_valueset(info->exp_gen, value, expr, iter) {
1129                                 ir_valueset_insert(info->antic_in, value, expr);
1130                         }
1131                 }
1132
1133
1134         } else if (n_succ > 1) {
1135                 int         i;
1136                 ir_node    *common     = NULL;
1137                 ir_node    *succ0      = get_Block_cfg_out(block, 0);
1138                 block_info *succ0_info = get_block_info(succ0);
1139
1140                 /* disjoint of antic_ins */
1141                 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1142                         /* iterate over remaining successors */
1143                         for (i = 1; i < n_succ; ++i) {
1144                                 ir_node    *succ      = get_Block_cfg_out(block, i);
1145                                 block_info *succ_info = get_block_info(succ);
1146
1147                                 /* value in antic_in? */
1148                                 common = ir_valueset_lookup(succ_info->antic_in, value);
1149                                 if (common == NULL)
1150                                         break;
1151                         }
1152
1153                         if (common && is_hoistable_above(expr, block, 0)) {
1154                                 ir_valueset_insert(info->antic_in, value, expr);
1155                                 DB((dbg, LEVEL_3, "common and clean %+F(%+F) in %+F\n", expr, value, block));
1156                         }
1157                 }
1158                 /* add exp_gen */
1159                 if (env->first_iter) {
1160                         foreach_valueset(info->exp_gen, value, expr, iter) {
1161                                 ir_valueset_insert(info->antic_in, value, expr);
1162                         }
1163                 }
1164         }
1165
1166
1167
1168         DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1169
1170         if (size != ir_valueset_size(info->antic_in))
1171                 env->changes |= 1;
1172 }
1173
1174 /* --------------------------------------------------------
1175  * Main algorithm Avail_out
1176  * --------------------------------------------------------
1177  */
1178
1179 /**
1180  * Computes Avail_out(block):
1181  *
1182  * Avail_in(block)  = Avail_out(dom(block))
1183  * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1184  *
1185  * Precondition:
1186  *  This function must be called in the top-down topological order:
1187  *  Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1188  *
1189  * @param block   the block
1190  * @param ctx     walker context
1191  */
1192 static void compute_avail_top_down(ir_node *block, void *ctx)
1193 {
1194         pre_env    *env   = (pre_env*)ctx;
1195         block_info *info;
1196
1197         if (block == env->end_block)
1198                 return;
1199
1200         info  = get_block_info(block);
1201
1202         /* Add all nodes from the immediate dominator.
1203            This ensures that avail_out contains the leader. */
1204         if (block != env->start_block) {
1205                 ir_node                *dom_block = get_Block_idom(block);
1206                 block_info             *dom_info  = get_block_info(dom_block);
1207                 ir_node                *value;
1208                 ir_node                *expr;
1209                 ir_valueset_iterator_t  iter;
1210
1211                 foreach_valueset(dom_info->avail_out, value, expr, iter) {
1212                         /* use first available expr as leader */
1213                         ir_valueset_replace(info->avail_out, value, expr);
1214                 }
1215         }
1216
1217         DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1218 }
1219
1220 /* --------------------------------------------------------
1221  * Main algorithm redundancy detection
1222  * --------------------------------------------------------
1223  */
1224
1225 /**
1226  * Flags node irn redundant depending on redundant parameter.
1227  */
1228 static void flag_redundant(ir_node *irn, unsigned redundant)
1229 {
1230         if (redundant) {
1231                 set_irn_link(irn, irn);
1232         } else {
1233                 set_irn_link(irn, NULL);
1234         }
1235 }
1236
1237 /*
1238  * Returns redundant flag of node irn.
1239  */
1240 static unsigned is_redundant(ir_node *irn)
1241 {
1242         return (get_irn_link(irn) != NULL);
1243 }
1244
1245 /**
1246  * Returns a valid mode if the value of expr is a partially redundant value.
1247  *
1248  * @param block   the block
1249  * @param expr    the expression
1250  *
1251  * @return mode of the expression if it is partially redundant else NULL
1252  */
1253 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1254 {
1255         ir_node *first_avail         = NULL;
1256         int      pos;
1257         int      arity               = get_irn_arity(block);
1258         int      fully_redundant     = 1;
1259         int      partially_redundant = 0;
1260         ir_mode *mode                = NULL;
1261
1262         DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1263
1264         /* for each predecessor blocks */
1265         for (pos = 0; pos < arity; ++pos) {
1266                 ir_node    *pred_block  = get_Block_cfgpred_block(block, pos);
1267                 block_info *pred_info;
1268                 ir_node    *trans_expr;
1269                 ir_node    *trans_value;
1270                 ir_node    *avail_expr;
1271
1272                 /* ignore bad blocks. */
1273                 if (is_Bad(pred_block))
1274                         continue;
1275
1276                 pred_info  = get_block_info(pred_block);
1277                 trans_expr = get_translated(pred_block, expr);
1278                 trans_value = identify(trans_expr);
1279
1280                 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1281                 /* value might be available through a not yet existing constant */
1282                 if (avail_expr == NULL && is_Const(trans_expr)) {
1283                         /* limit range of new constants */
1284                         ir_mode   *cmode = get_irn_mode(trans_expr);
1285                         ir_tarval *upper = new_tarval_from_long(127, cmode);
1286                         ir_tarval *lower = new_tarval_from_long(-127, cmode);
1287                         ir_tarval *c     = get_Const_tarval(trans_expr);
1288
1289                         if (tarval_cmp(lower, c) != ir_relation_greater_equal &&
1290                                 tarval_cmp(c, upper) != ir_relation_greater_equal) {
1291                                 avail_expr = NULL;
1292                         } else {
1293                                 avail_expr = trans_expr;
1294                         }
1295             }
1296
1297                 DB((dbg, LEVEL_3, "avail_expr %+F\n", avail_expr));
1298
1299                 if (avail_expr == NULL) {
1300                         pred_info->avail = trans_expr;
1301                         pred_info->found = 0;
1302                         fully_redundant  = 0;
1303                 } else {
1304                         /* expr is available */
1305                         pred_info->avail    = avail_expr;
1306                         pred_info->found    = 1;
1307                         mode                = get_irn_mode(avail_expr);
1308                         partially_redundant = 1;
1309
1310                         if (first_avail == NULL)
1311                                 first_avail = avail_expr;
1312                         else if (first_avail != avail_expr)
1313                                 /* Multiple different expressions are available */
1314                                 fully_redundant = 0;
1315
1316                         DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1317                 }
1318         }
1319
1320 #if BETTER_GREED
1321         /* value is redundant from last iteration,
1322            but has not been removed from antic_in (is not optimized) */
1323         if (! environ->first_iter && is_redundant(expr))
1324                 return mode;
1325 #endif
1326
1327         /* If it is not the same value already existing along every predecessor
1328        and it is defined by some predecessor then it is partially redundant. */
1329         if (! partially_redundant || fully_redundant)
1330                 return NULL;
1331         return mode;
1332 }
1333
1334 /**
1335  * Updates the new_set of a block by adding the new_set of
1336  * the immediate dominating block.
1337  *
1338  * @param  the block
1339  */
1340 static void update_new_set(ir_node *block, ir_node *idom)
1341 {
1342         ir_node                *value;
1343         ir_node                *expr;
1344         ir_valueset_iterator_t  iter;
1345         block_info             *curr_info = get_block_info(block);
1346         block_info             *idom_info = get_block_info(idom);
1347         int                     updated   = 0;
1348
1349         DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1350         foreach_valueset(idom_info->new_set, value, expr, iter) {
1351                 /* inherit new_set from immediate dominator */
1352                 ir_valueset_insert(curr_info->new_set, value, expr);
1353                 /* replace in avail_out */
1354                 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1355         }
1356 #ifdef DEBUG_libfirm
1357         if (updated)
1358                 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1359 #endif
1360 } /* update_new_set */
1361
1362 /**
1363  * Checks if hoisting irn is greedy.
1364  * Greedy hoisting means that there are non partially redundant nodes
1365  * hoisted. This happens if a partially redundant node has
1366  * non redundant predecessors.
1367  */
1368 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1369 {
1370         int arity = get_irn_arity(irn);
1371         int i;
1372
1373         for (i = 0; i < arity; ++i) {
1374                 ir_node *pred        = get_irn_n(irn, i);
1375                 ir_node *anti_leader = get_anti_leader(pred, block);
1376
1377                 /* predecessor is on current path we have to ensure redundancy */
1378                 if (block_dominates(block, get_nodes_block(pred)) && ! is_redundant(anti_leader))
1379                         return 1;
1380         }
1381         return 0;
1382 }
1383
1384 /**
1385  * Perform insertion of partially redundant values.
1386  * For every block node, do the following:
1387  * 1.  Propagate the NEW_SETS of the dominator into the current block.
1388  * If the block has multiple predecessors,
1389  *     2a. Iterate over the ANTIC expressions for the block to see if
1390  *         any of them are partially redundant.
1391  *     2b. If so, insert them into the necessary predecessors to make
1392  *         the expression fully redundant.
1393  *     2c. Insert a new Phi merging the values of the predecessors.
1394  *     2d. Insert the new Phi, and the new expressions, into the
1395  *         NEW_SETS set.
1396  *
1397  * @param block  the block
1398  * @param ctx    the walker environment
1399  */
1400 static void insert_nodes_walker(ir_node *block, void *ctx)
1401 {
1402         pre_env                *env    = (pre_env*)ctx;
1403         int                     arity  = get_irn_arity(block);
1404         ir_node                *value;
1405         ir_node                *expr;
1406         block_info             *info;
1407         ir_node                *idom;
1408         int                     pos;
1409         ir_valueset_iterator_t  iter;
1410 #if BETTER_GREED
1411         plist_t *stack;
1412 #endif
1413
1414         /* only blocks */
1415         if (! is_Block(block))
1416                 return;
1417
1418         /* ensure that even the start block has a new_set */
1419         info = get_block_info(block);
1420         if (info->new_set)
1421                 ir_valueset_del(info->new_set);
1422         info->new_set = ir_valueset_new(16);
1423
1424         if (block == env->start_block)
1425                 return;
1426
1427         DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1428
1429         idom = get_Block_idom(block);
1430         update_new_set(block, idom);
1431
1432         /* process only path joining blocks */
1433         if (arity < 2) {
1434                 return;
1435         }
1436
1437 #if BETTER_GREED
1438         stack = plist_new();
1439         foreach_valueset(info->antic_in, value, expr, iter) {
1440                 /* inverse topologic */
1441                 plist_insert_front(stack, expr);
1442         }
1443 #endif
1444
1445         /* This is the main reason antic_in is preverred over antic_out;
1446            we may iterate over every anticipated value first and not
1447            over the predecessor blocks. */
1448         foreach_valueset(info->antic_in, value, expr, iter) {
1449                 ir_mode  *mode;
1450                 ir_node  *phi;
1451                 ir_node **phi_in;
1452
1453                 /* already done? */
1454                 if (ir_valueset_lookup(info->antic_done, value))
1455                         continue;
1456
1457                 /* filter phi nodes from antic_in */
1458                 if (is_Phi(expr)) {
1459                         flag_redundant(expr, 1);
1460                         continue;
1461                 }
1462
1463                 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1464
1465                 /* A value computed in the dominator is totally redundant.
1466                    Hence we have nothing to insert. */
1467                 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1468                         DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1469                         DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1470
1471                         flag_redundant(expr, 1);
1472                         continue;
1473                 }
1474
1475 #if !BETTER_GREED
1476                 if (is_hoisting_greedy(expr, block)) {
1477                         DB((dbg, LEVEL_2, "Greedy\n"));
1478                         flag_redundant(expr, 0);
1479                         continue;
1480                 }
1481 #endif
1482
1483                 mode = is_partially_redundant(block, expr, value);
1484                 if (mode == NULL) {
1485                         flag_redundant(expr, 0);
1486                         continue;
1487                 } else {
1488                         flag_redundant(expr, 1);
1489                 }
1490
1491 #if BETTER_GREED
1492                 if (is_hoisting_greedy(expr, block)) {
1493                         DB((dbg, LEVEL_2, "Better greed: greedy\n"));
1494                         continue;
1495                 }
1496 #endif
1497
1498 #if LOADS || DIVMODS
1499                 /* save old mode_M phis to remove keepalive edges later */
1500                 if (is_memop(expr)) {
1501                         ir_node *mem = get_memop_mem(expr);
1502                         if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1503                                 ir_nodeset_insert(env->keeps, mem);
1504                         }
1505                 }
1506 #endif
1507
1508 #ifdef DEBUG_libfirm
1509                 if (! is_Proj(expr)) {
1510                         if (env->first_iter)
1511                                 inc_stats(gvnpre_stats->first_iter_found);
1512                         inc_stats(gvnpre_stats->partially);
1513                 }
1514                 if (is_Load(expr))
1515                         inc_stats(gvnpre_stats->loads);
1516                 else if (is_Div(expr) || is_Mod(expr))
1517                         inc_stats(gvnpre_stats->divmods);
1518 #endif
1519
1520                 phi_in = XMALLOCN(ir_node *, arity);
1521
1522                 /* for each predecessor block */
1523                 for (pos = 0; pos < arity; ++pos) {
1524                         ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1525                         block_info *pred_info;
1526
1527                         /* ignore bad blocks. */
1528                         if (is_Bad(pred_block)) {
1529                                 ir_graph *irg = get_irn_irg(pred_block);
1530                                 phi_in[pos] = new_r_Bad(irg, mode);
1531                                 continue;
1532                         }
1533                         pred_info = get_block_info(pred_block);
1534
1535                         if (! pred_info->found) {
1536                                 ir_node *trans = get_translated(pred_block, expr);
1537
1538                                 assert(trans);
1539                                 if (trans == expr) {
1540                                         /* has been translated if ancestor had a phi and was translated */
1541                                         /* also non phi descendants can be redundant, but have
1542                                            not yet been (phi) translated. */
1543                                         trans = phi_translate(expr, block, pos, pred_block);
1544                                         set_translated(pred_info->trans, expr, trans);
1545                                 }
1546
1547                                 DB((dbg, LEVEL_3, "Use new %+F in %+F because expr %+F not available\n", trans, pred_block, expr));
1548
1549                                 /* value is now available in target block through trans
1550                                    insert (not replace) because it has not been available */
1551                                 ir_valueset_insert(pred_info->avail_out, value, trans);
1552                                 phi_in[pos] = trans;
1553                         } else {
1554                                 /* value available */
1555                                 phi_in[pos] = pred_info->avail;
1556                         }
1557                         DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1558                 }
1559
1560                 /* We do not connect tuples as they will be connected automatically
1561                    by the corresponding projections. */
1562                 if (get_irn_mode(expr) != mode_T) {
1563
1564                         phi = new_r_Phi(block, arity, phi_in, mode);
1565                         DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1566
1567                         /* this value is now available through the new phi */
1568                         ir_valueset_replace(info->avail_out, value, phi);
1569                         ir_valueset_insert(info->new_set, value, phi);
1570                 }
1571                 free(phi_in);
1572
1573 #if 0
1574                 /* remove from antic_in, because expr is not anticipated
1575                    anymore in this block */
1576                 ir_valueset_remove_iterator(info->antic_in, &iter);
1577 #endif
1578                 ir_valueset_insert(info->antic_done, value, expr);
1579
1580                 env->changes |= 1;
1581         }
1582
1583 #if BETTER_GREED
1584         if (env->changes) {
1585                 plist_element_t *it;
1586                 /* iterate in inverse topological order */
1587                 foreach_plist(stack, it) {
1588                         ir_node *irn   = (ir_node *)plist_element_get_value(it);
1589                         ir_node *block = get_nodes_block(irn);
1590                         int      j;
1591                         char     redundant = 1;
1592
1593                         /* does irn only have redundant successors? */
1594
1595                         if (! get_irn_outs_computed(irn))
1596                                 continue;
1597
1598                         for (j = get_irn_n_outs(irn) - 1; j >= 0; --j) {
1599                                 ir_node *succ = get_irn_out(irn, j);
1600
1601                                 /* if succ and irn are in the same block */
1602                                 if (get_nodes_block(succ) == block && is_redundant(succ)) {
1603                                         continue;
1604                                 } else {
1605                                         redundant = 0;
1606                                         break;
1607                                 }
1608                         }
1609
1610                         if (redundant)
1611                                 flag_redundant(irn, 1);
1612                 }
1613         }
1614         plist_free(stack);
1615 #endif
1616
1617 }
1618
1619 #if HOIST_HIGH
1620 /**
1621  * Dom tree block walker to insert nodes into the highest
1622  * possible block where their .
1623  *
1624  */
1625 static void hoist_high(ir_node *block, void *ctx)
1626 {
1627         pre_env                *env        = (pre_env*)ctx;
1628         block_info             *curr_info;
1629         ir_valueset_iterator_t  iter;
1630         ir_node                *expr;
1631         ir_node                *value;
1632         int                     arity      = get_irn_arity(block);
1633
1634         if (! is_Block(block))
1635                 return;
1636
1637         if (arity < 2)
1638                 return;
1639
1640         if (block == env->start_block)
1641                 return;
1642
1643         curr_info = get_block_info(block);
1644
1645         DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1646
1647         /* foreach entry optimized by insert node phase */
1648         foreach_valueset(curr_info->antic_done, value, expr, iter) {
1649                 int pos;
1650
1651                 if (is_memop(expr) || is_Proj(expr))
1652                         continue;
1653
1654                 /* visit hoisted expressions */
1655                 for (pos = 0; pos < arity; ++pos) {
1656                         /* standard target is predecessor block */
1657                         ir_node    *target     = get_Block_cfgpred_block(block, pos);
1658                         block_info *pred_info  = get_block_info(target);
1659                         ir_node    *avail;
1660                         ir_node    *temp_target;
1661                         ir_node    *new_target;
1662                         ir_node    *trans_expr;
1663                         ir_node    *trans_value;
1664                         ir_node    *dom;
1665                         int         avail_arity;
1666                         int         i;
1667                         unsigned    nest_depth;
1668
1669                         /* get phi translated value */
1670                         trans_expr  = get_translated(target, expr);
1671                         trans_value = identify(trans_expr);
1672                         avail = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1673
1674                         /* get the used expr on this path */
1675                         if (avail == NULL)
1676                                 avail = trans_expr;
1677
1678                         value = identify(avail);
1679
1680                         /* anticipation border */
1681                         new_target  = NULL;
1682                         temp_target = NULL;
1683                         nest_depth  = get_loop_depth(get_irn_loop(target));
1684                         dom         = target;
1685
1686                         while(dom != environ->start_block) {
1687                                 block_info *dom_info;
1688
1689                                 dom = get_Block_idom(dom);
1690                                 dom_info = get_block_info(dom);
1691
1692                                 if (is_Bad(dom))
1693                                         break;
1694
1695                                 /* check if available node ist still anticipated and clean
1696                                    (clean is part of antic) */
1697                                 /* value may be hoisted abover block */
1698                                 if (! ir_valueset_lookup(dom_info->antic_in, value))
1699                                         break;
1700
1701                                 nest_depth = get_loop_depth(get_irn_loop(dom));
1702
1703                                 /* do not hoist into loops */
1704                                 if (get_loop_depth(get_irn_loop(dom)) <= nest_depth)
1705                                         new_target = dom;
1706
1707                                 temp_target = dom;
1708                         }
1709
1710                         /* No new target or does the available node already dominate the new_target? */
1711                         if (new_target) {
1712                                 DB((dbg, LEVEL_2, "leader block %+F\n", get_nodes_block(avail)));
1713                                 /* already same block or dominating?*/
1714                                 if (block_dominates(get_nodes_block(avail), new_target)) {
1715                                         DB((dbg, LEVEL_4, "fail: antic border %+F\n", block));
1716                                         new_target = NULL;
1717                                 }
1718                         }
1719
1720                         DB((dbg, LEVEL_2, "antic border %+F\n", new_target));
1721
1722                         avail_arity = get_irn_arity(avail);
1723                         /* check for uses of available ins on current path*/
1724                         for (i = 0; i < avail_arity; i++) {
1725                                 ir_node *pred       = get_irn_n(avail, i);
1726                                 int      j;
1727
1728                                 if (new_target == NULL)
1729                                         break;
1730
1731                                 /* TODO this might be a problem as we mainly operate on new nodes */
1732                                 if (! get_irn_outs_computed(pred)) {
1733                                         new_target = NULL;
1734                                         break;
1735                                 }
1736
1737 #if 0
1738                                 if (! block_dominates(pred_block, new_target)) {
1739                                         new_target = pred_block;
1740                                 }
1741 #endif
1742                                 /* check for every successor */
1743                                 for (j = get_irn_n_outs(pred) - 1; j >= 0; --j) {
1744                                         ir_node *succ = get_irn_out(pred, j);
1745
1746                                         /* is succ on this path? */
1747                                         if (block_dominates(new_target, get_nodes_block(succ))) {
1748                                                 ir_node *succ_value = identify(succ);
1749
1750                                                 /* Do we have another user than avail?
1751                                                    Then predecessor is not dead after removal of avail. */
1752                                                 if (succ_value != value) {
1753                                                         DB((dbg, LEVEL_4, "fail: still used %+F\n", succ));
1754                                                         new_target = NULL;
1755                                                         break;
1756                                                 }
1757                                         }
1758                                 }
1759                         }
1760
1761                         /* only one usage on our path */
1762                         if (new_target) {
1763                                 /* push the available node up into */
1764                                 set_nodes_block(avail, new_target);
1765
1766                                 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1767                         }
1768                 }
1769         }
1770 }
1771 #endif
1772
1773 /* --------------------------------------------------------
1774  * Elimination of fully redundant nodes
1775  * --------------------------------------------------------
1776  */
1777
1778 /**
1779  * Walker which finds redundant nodes using avail_out sets
1780  * and exchanges them for existing ones.
1781  * We cannot change the graph here as this would affect
1782  * the hash values of the nodes.
1783  *
1784  * @param irn  the node
1785  * @param ctx  the walker environment
1786  */
1787 static void eliminate(ir_node *irn, void *ctx)
1788 {
1789         pre_env *env = (pre_env*)ctx;
1790
1791         if (! is_Block(irn)) {
1792                 ir_node    *block = get_nodes_block(irn);
1793                 block_info *info  = get_block_info(block);
1794                 ir_node    *value = identify(irn);
1795
1796                 if (value != NULL) {
1797                         ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1798
1799                         if (expr != NULL && expr != irn) {
1800                                 elim_pair *p = OALLOC(env->obst, elim_pair);
1801
1802                                 p->old_node = irn;
1803                                 p->new_node = expr;
1804                                 p->next     = env->pairs;
1805                                 if (get_irn_idx(expr) >= env->last_idx)
1806                                         p->reason = FS_OPT_GVN_PARTLY;
1807                                 else
1808                                         p->reason = FS_OPT_GVN_FULLY;
1809                                 env->pairs = p;
1810                                 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1811                         }
1812                 }
1813         }
1814 }  /* eliminate */
1815
1816 /**
1817  * Do all the recorded changes and optimize
1818  * newly created Phi's.
1819  *
1820  * @param pairs  list of elimination pairs
1821  */
1822 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1823 {
1824         elim_pair             *p;
1825         ir_nodeset_iterator_t  iter;
1826         ir_node               *m_phi;
1827         ir_node               *end    = environ->end_node;
1828
1829         for (p = pairs; p != NULL; p = p->next) {
1830                 /* might be already changed */
1831                 p->new_node = skip_Id(p->new_node);
1832
1833                 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1834
1835                 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1836                  * which we can optimize here */
1837                 if (is_Phi(p->new_node)) {
1838                         int      i;
1839                         ir_node *res = NULL;
1840
1841                         for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1842                                 ir_node *pred = get_irn_n(p->new_node, i);
1843
1844                                 if (pred != p->old_node) {
1845                                         if (res) {
1846                                                 res = NULL;
1847                                                 break;
1848                                         }
1849                                         res = pred;
1850                                 }
1851                         }
1852                         if (res) {
1853                                 exchange(p->new_node, res);
1854                                 p->new_node = res;
1855                         }
1856                 }
1857                 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1858
1859                 exchange(p->old_node, p->new_node);
1860         }
1861
1862         /* remove keep alive edges of unused mode_M phis */
1863         foreach_ir_nodeset(keeps, m_phi, iter) {
1864                 remove_End_keepalive(end, m_phi);
1865         }
1866 }  /* eliminate_nodes */
1867
1868 /* --------------------------------------------------------
1869  * GVN PRE pass
1870  * --------------------------------------------------------
1871  */
1872
1873 /**
1874  * Gvn_Pre algorithm.
1875  *
1876  * @param irg   the graph
1877  */
1878 static void gvn_pre(ir_graph *irg, pre_env *env)
1879 {
1880         unsigned              antic_iter;
1881         unsigned              insert_iter;
1882
1883         DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1884
1885         /* allocate block info */
1886         irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1887
1888         ir_nodehashmap_init(&value_map);
1889
1890         /* generate exp_gen */
1891         irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
1892         dump_all_expgen_sets(env->list);
1893
1894         /* compute the avail_out sets for all blocks */
1895         dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
1896
1897         /* compute the anticipated value sets for all blocks */
1898         antic_iter      = 0;
1899         env->first_iter = 1;
1900
1901         /* antic_in passes */
1902         do {
1903                 ++antic_iter;
1904                 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
1905                 env->changes = 0;
1906                 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
1907                 env->first_iter = 0;
1908                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1909         } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
1910
1911         DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
1912
1913         ir_nodeset_init(env->keeps);
1914         insert_iter       = 0;
1915         env->insert_phase = 1;
1916         env->first_iter   = 1;
1917         env->last_idx     = get_irg_last_idx(irg);
1918         /* compute redundant expressions */
1919         do {
1920                 ++insert_iter;
1921                 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
1922                 env->changes = 0;
1923                 /* TODO topologically top down would be better; fewer iterations. */
1924                 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
1925                 env->first_iter = 0;
1926                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1927         } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
1928         DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
1929
1930 #if HOIST_HIGH
1931         dom_tree_walk_irg(irg, hoist_high, NULL, env);
1932 #endif
1933
1934         /* last step: eliminate nodes */
1935         irg_walk_graph(irg, NULL, eliminate, env);
1936         eliminate_nodes(env->pairs, env->keeps);
1937
1938         ir_nodeset_destroy(env->keeps);
1939 }
1940
1941 /**
1942  * Gvn_Pre pass for graph irg.
1943  *
1944  * @param irg   the graph
1945  */
1946 void do_gvn_pre(ir_graph *irg)
1947 {
1948         struct obstack        obst;
1949         pre_env               env;
1950         ir_nodeset_t          keeps;
1951         optimization_state_t  state;
1952         block_info           *block_info;
1953
1954         /* bads and unreachables cause too much trouble with dominance
1955            dominance
1956            loop info for endless loop detection
1957            no critical edges is GVN-PRE precondition
1958          */
1959         assure_irg_properties(irg,
1960                 IR_GRAPH_PROPERTY_NO_BADS
1961                 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
1962                 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
1963                 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
1964                 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
1965                 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
1966
1967         /* register a debug mask */
1968         FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
1969
1970         save_optimization_state(&state);
1971         environ = &env;
1972
1973         /* edges will crash if enabled due to our allocate on other obstack trick */
1974         edges_deactivate(irg);
1975         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
1976
1977         DEBUG_ONLY(init_stats();)
1978
1979         /* setup environment */
1980         obstack_init(&obst);
1981         env.graph        = irg;
1982         env.obst         = &obst;
1983         env.list         = NULL;
1984         env.start_block  = get_irg_start_block(irg);
1985         env.end_block    = get_irg_end_block(irg);
1986         env.end_node     = get_irg_end(irg);
1987         env.pairs        = NULL;
1988         env.keeps        = &keeps;
1989         env.insert_phase = 0;
1990
1991         /* Detect and set links of infinite loops to non-zero. */
1992         analyse_loops(irg);
1993
1994         /* Switch on GCSE. We need it to correctly compute
1995            the value of a node, which is independent from
1996            its block. */
1997         set_opt_global_cse(1);
1998         /* new_identities */
1999         if (irg->value_table != NULL)
2000                 del_pset(irg->value_table);
2001         /* initially assumed nodes in pset are 512 */
2002         irg->value_table = new_pset(compare_gvn_identities, 512);
2003
2004         /* do GVN-PRE pass */
2005         gvn_pre(irg, &env);
2006         DEBUG_ONLY(print_stats();)
2007
2008         /* clean up: delete all sets */
2009         for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2010                 free_block_info(block_info);
2011         }
2012
2013         DEBUG_ONLY(free_stats();)
2014         ir_nodehashmap_destroy(&value_map);
2015         obstack_free(&obst, NULL);
2016         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2017
2018         /* Pin the graph again.
2019            This is needed due to the use of set_opt_global_cse(1) */
2020         set_irg_pinned(irg, op_pin_state_pinned);
2021         restore_optimization_state(&state);
2022         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2023
2024         /* TODO there are optimizations that try to use the existing value_table */
2025         new_identities(irg);
2026 }
2027
2028 /* Creates an ir_graph pass for do_gvn_pre. */
2029 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2030 {
2031         return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
2032 }