Different management of antic sets.
[libfirm] / ir / opt / gvn_pre.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Global Value Numbering Partial Redundancy Elimination
23  *          (VanDrunen Hosking 2004)
24  * @author  Michael Beck
25  * @brief
26  */
27 #include "config.h"
28
29 #include "debug.h"
30 #include "ircons.h"
31 #include "irdom.h"
32 #include "iredges.h"
33 #include "irflag.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
41 #include "irouts.h"
42 #include "irpass.h"
43 #include "valueset.h"
44 #include "irloop.h"
45
46 #include "irgraph_t.h"
47 #include "irnode_t.h"
48 #include "iropt_t.h"
49 #include "plist.h"
50
51 #define MAX_ANTIC_ITER 10
52 #define MAX_INSERT_ITER 3
53
54 #define HOIST_HIGH 1
55 #define BETTER_GREED 0
56 #define LOADS 0
57 #define DIVMODS 0
58 #define NO_INF_LOOPS 0
59 #define NO_INF_LOOPS2 0
60
61 /** Additional info we need for every block. */
62 typedef struct block_info {
63         ir_valueset_t     *exp_gen;    /* contains this blocks clean expressions */
64         ir_valueset_t     *avail_out;  /* available values at block end */
65         ir_valueset_t     *antic_in;   /* clean anticipated values at block entry */
66         ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert_nodes() */
67         ir_valueset_t     *new_set;    /* new by hoisting made available values */
68         ir_nodehashmap_t  *trans;      /* contains translated nodes translated into block */
69         ir_node           *avail;      /* saves available node for insert_nodes */
70         int                found;      /* saves kind of availability for insert_nodes */
71         ir_node           *block;      /* block of the block_info */
72         struct block_info *next;       /* links all instances for easy access */
73 } block_info;
74
75 /**
76  * A pair of nodes to be exchanged.
77  * We have to defer the exchange because our hash-sets cannot
78  * find an already replaced node.
79  */
80 typedef struct elim_pair {
81         ir_node *old_node;      /* node that will be replaced */
82         ir_node *new_node;      /* replacement for old_node */
83         struct elim_pair *next; /* links all instances for easy access */
84         int     reason;         /* reason for the replacement */
85 } elim_pair;
86
87 /** environment for the GVN-PRE algorithm */
88 typedef struct pre_env {
89         ir_graph       *graph;        /* current graph */
90         struct obstack *obst;         /* obstack to allocate on */
91         ir_node        *start_block;  /* start block of the current graph */
92         ir_node        *end_block;    /* end block of the current graph */
93         ir_node        *end_node;     /* end node of the current graph */
94         block_info     *list;         /* block_info list head */
95         elim_pair      *pairs;        /* elim_pair list head */
96         ir_nodeset_t   *keeps;        /* a list of to be removed phis to kill their keep alive edges */
97         unsigned        last_idx;     /* last node index of input graph */
98         char            changes;      /* flag for fixed point iterations - non-zero if changes occurred */
99         char            first_iter;   /* non-zero for first fixed point iteration */
100         char            insert_phase; /* non-zero for first fixed point iteration */
101 } pre_env;
102
103 static pre_env *environ;
104 /* custom GVN value map */
105 static ir_nodehashmap_t value_map;
106
107 /* debug module handle */
108 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
109
110 #ifdef DEBUG_libfirm
111
112 /* --------------------------------------------------------
113  * Statistics
114  * --------------------------------------------------------
115  */
116
117 typedef struct gvnpre_statistics {
118         int replaced;
119         int partially;
120         int fully;
121         int loads;
122         int divmods;
123         int hoist_high;
124         int first_iter_found;
125         int antic_iterations;
126         int insert_iterations;
127         int infinite_loops;
128 } gvnpre_statistics;
129
130 gvnpre_statistics *gvnpre_stats = NULL;
131
132 static void init_stats()
133 {
134         gvnpre_stats = XMALLOCZ(gvnpre_statistics);
135 }
136
137 static void free_stats()
138 {
139         free(gvnpre_stats);
140         gvnpre_stats = NULL;
141 }
142
143 static void print_stats()
144 {
145         gvnpre_statistics *stats = gvnpre_stats;
146         DB((dbg, LEVEL_1, "replaced             : %d\n", stats->replaced));
147         DB((dbg, LEVEL_1, "antic_in iterations  : %d\n", stats->antic_iterations));
148         DB((dbg, LEVEL_1, "insert iterations    : %d\n", stats->insert_iterations));
149         DB((dbg, LEVEL_1, "infinite loops       : %d\n", stats->infinite_loops));
150         DB((dbg, LEVEL_1, "fully redundant      : %d\n", stats->fully));
151         DB((dbg, LEVEL_1, "partially redundant  : %d\n", stats->partially));
152         DB((dbg, LEVEL_1, "  loads                : %d\n", stats->loads));
153         DB((dbg, LEVEL_1, "  Divs/Mods            : %d\n", stats->divmods));
154         DB((dbg, LEVEL_1, "  hoist high           : %d\n", stats->hoist_high));
155         DB((dbg, LEVEL_1, "  first iteration      : %d\n", stats->first_iter_found));
156 }
157
158 #define set_stats(var, value) (var)=(value)
159 #define inc_stats(var)        ((var)+=1)
160
161 /* --------------------------------------------------------
162  * Dump valuesets
163  * --------------------------------------------------------
164  */
165
166 /**
167  * Dump a value set.
168  *
169  * @param set    the set to dump
170  * @param txt    a text to describe the set
171  * @param block  the owner block of the set
172  */
173 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
174 {
175         ir_valueset_iterator_t iter;
176         ir_node *value, *expr;
177         int i;
178
179         DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
180         i = 0;
181         foreach_valueset(set, value, expr, iter) {
182                 if ((i & 3) == 3)
183                         DB((dbg, LEVEL_2, "\n"));
184                 if (value != expr)
185                         DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
186                 else
187                         DB((dbg, LEVEL_2, " %+F,", expr));
188                 ++i;
189         }
190         DB((dbg, LEVEL_2, "\n}\n"));
191 }  /* dump_value_set */
192
193 /**
194  * Dump all exp_gen value sets.
195  *
196  * @param list  the list of block infos to retrieve the sets from
197  */
198 static void dump_all_expgen_sets(block_info *list)
199 {
200         block_info *block_info;
201
202         for (block_info = list; block_info != NULL; block_info = block_info->next) {
203                 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
204         }
205 }
206
207 #else
208
209 #define dump_all_expgen_sets(list)
210 #define dump_value_set(set, txt, block)
211
212 #endif /* DEBUG_libfirm */
213
214 /* --------------------------------------------------------
215  * GVN Functions
216  * --------------------------------------------------------
217  */
218
219 /**
220  * Compares node collisions in valuetable.
221  * Modified identities_cmp().
222  */
223 static int compare_gvn_identities(const void *elt, const void *key)
224 {
225         ir_node *a = (ir_node *)elt;
226         ir_node *b = (ir_node *)key;
227         int i, irn_arity_a;
228
229         if (a == b) return 0;
230
231         /* phi nodes kill predecessor values and are always different */
232         if (is_Phi(a) || is_Phi(b))
233                 return 1;
234
235         if ((get_irn_op(a) != get_irn_op(b)) ||
236             (get_irn_mode(a) != get_irn_mode(b))) return 1;
237
238         /* compare if a's in and b's in are of equal length */
239         irn_arity_a = get_irn_arity(a);
240         if (irn_arity_a != get_irn_arity(b))
241                 return 1;
242
243         /* blocks are never the same */
244         if (is_Block(a) || is_Block(b))
245                 return 1;
246
247         /* TODO depends on load optimization */
248         if (get_irn_pinned(a) == op_pin_state_pinned) {
249                 /* for pinned nodes, the block inputs must be equal */
250                 if (get_irn_n(a, -1) != get_irn_n(b, -1))
251                         return 1;
252         } else {
253                 /* we need global values independent from their blocks */
254                 assert(get_opt_global_cse());
255         }
256
257         /* compare a->in[0..ins] with b->in[0..ins] */
258         for (i = 0; i < irn_arity_a; ++i) {
259                 ir_node *pred_a = get_irn_n(a, i);
260                 ir_node *pred_b = get_irn_n(b, i);
261                 if (pred_a != pred_b) {
262                         /* if both predecessors are CSE neutral they might be different */
263                         if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
264                                 return 1;
265                 }
266         }
267
268         /*
269          * here, we already now that the nodes are identical except their
270          * attributes
271          */
272         if (a->op->ops.node_cmp_attr)
273                 return a->op->ops.node_cmp_attr(a, b);
274
275         return 0;
276 }
277
278 /**
279  * Identify does a lookup in the GVN valuetable.
280  * To be used when no new GVN values are to be created.
281  *
282  * @param e  a node representing an expression
283  * @return a node representing the value
284  */
285 static ir_node *identify(ir_node *irn)
286 {
287         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
288         if (value)
289                 return value;
290         /* irn represents a new value */
291         return identify_remember(irn);
292 }
293
294 /**
295  * remember() adds node irn to the GVN valuetable.
296  * Identify_remember only identifies values of nodes with the
297  * same predecessor nodes (not values). By creating a node from the predecessor
298  * values, a true valuetree is built. Phis kill their predecessor value,
299  * so no circular dependencies need to be resolved.
300  *
301  * TODO Improvement:
302  *      Maybe this could be implemented with a custom node hash that takes
303  *      phi nodes and true values (instead of predecessors) into account,
304  *      resulting in value numbers.
305  *
306  * @param irn  a node representing an expression
307  * @return     the value of the expression
308  */
309 static ir_node *remember(ir_node *irn)
310 {
311         int       arity   = get_irn_arity(irn);
312         int       i;
313         int       changed = 0;
314         ir_node **in      = XMALLOCN(ir_node *, arity);
315         ir_node  *value;
316
317         DB((dbg, LEVEL_4, "Remember %+F\n", irn));
318
319         for (i = 0; i < arity; ++i) {
320                 ir_node *pred       = get_irn_n(irn, i);
321                 ir_node *pred_value = identify(pred);
322
323                 /* phi will be translated anyway, so kill the predecessor values
324                    also prevents circular dependencies */
325                 if (is_Phi(pred)) {
326                         /* every phi represents its own value */
327                         in[i] = pred;
328                         continue;
329                 }
330
331                 /* predecessor is not its value representation */
332                 if (pred != pred_value)
333                         changed = 1;
334                 in[i] = pred_value;
335         }
336
337         if (changed) {
338                 /* create representative for */
339                 ir_node *nn = new_ir_node(
340                         get_irn_dbg_info(irn),
341                         get_irn_irg(irn),
342                         get_nodes_block(irn),
343                         get_irn_op(irn),
344                         get_irn_mode(irn),
345                         get_irn_arity(irn),
346                         in);
347                 copy_node_attr(environ->graph, irn, nn);
348
349                 /* now the value can be determined */
350                 value = identify_remember(nn);
351         } else {
352                 value = identify_remember(irn);
353         }
354         free(in);
355
356         ir_nodehashmap_insert(&value_map, irn, value);
357
358         return value;
359 }
360
361 /** When the value map has been built we may lookup expressions
362  *  and remember them if new.
363  */
364 static ir_node *identify_or_remember(ir_node *irn)
365 {
366         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
367         if (value)
368                 return value;
369         else
370                 return remember(irn);
371 }
372
373 /* --------------------------------------------------------
374  * Block info
375  * --------------------------------------------------------
376  */
377
378 /**
379  * Allocate block info for block block.
380  *
381  * @param block   the block
382  * @param env     the environment
383  */
384 static void alloc_block_info(ir_node *block, pre_env *env)
385 {
386         block_info *info = OALLOC(env->obst, block_info);
387
388         set_irn_link(block, info);
389         info->exp_gen    = ir_valueset_new(16);
390         info->avail_out  = ir_valueset_new(16);
391         info->antic_in   = ir_valueset_new(16);
392         info->antic_done = ir_valueset_new(16);
393         info->trans = XMALLOC(ir_nodehashmap_t);
394         ir_nodehashmap_init(info->trans);
395
396         info->new_set = NULL;
397         info->avail   = NULL;
398         info->block   = block;
399         info->found   = 1;
400
401         info->next = env->list;
402         env->list  = info;
403 }  /* alloc_block_info */
404
405 static void free_block_info(block_info *block_info)
406 {
407         ir_valueset_del(block_info->exp_gen);
408         ir_valueset_del(block_info->avail_out);
409         ir_valueset_del(block_info->antic_in);
410         if (block_info->trans) {
411                 ir_nodehashmap_destroy(block_info->trans);
412                 free(block_info->trans);
413         }
414         if (block_info->new_set)
415                 ir_valueset_del(block_info->new_set);
416 }
417
418 /**
419  * Bottom up walker that ensures that every block gets a block info.
420  *
421  * @param irn   the node
422  * @param ctx   the environment
423  */
424 static void block_info_walker(ir_node *irn, void *ctx)
425 {
426         if (is_Block(irn)) {
427                 pre_env *env = (pre_env*)ctx;
428                 alloc_block_info(irn, env);
429         }
430 }
431
432 /**
433  * Returns the block info of a block.
434  */
435 static block_info *get_block_info(ir_node *block)
436 {
437         return (block_info*)get_irn_link(block);
438 }
439
440 /* --------------------------------------------------------
441  * Infinite loop analysis
442  * --------------------------------------------------------
443  */
444
445 /**
446  * Walker to set block marks and loop links to 0.
447  */
448 static void clear_block_mark_loop_link(ir_node *block, void *env)
449 {
450         (void) env;
451
452         if (is_Block(block)) {
453                 set_Block_mark(block, 0);
454                 set_loop_link(get_irn_loop(block), NULL);
455         }
456 }
457
458 /**
459  * Returns non-zero if block is part of real loop loop.
460  */
461
462 static unsigned in_loop(ir_node *block, ir_loop *loop)
463 {
464         ir_loop *l     = get_irn_loop(block);
465         ir_loop *outer = get_irg_loop(environ->graph);
466
467         while (l != loop) {
468                 /* loop tree root is not a loop */
469                 if (l == NULL || l == outer)
470                         return 0;
471                 l = get_loop_outer_loop(l);
472         }
473         return 1;
474 }
475
476 /**
477  * Returns the outermost real loop of loop.
478  */
479 static ir_loop *get_loop_outermost(ir_loop *loop)
480 {
481         ir_loop *outer = get_irg_loop(environ->graph);
482         ir_loop *l     = loop;
483         ir_loop *last  = NULL;
484
485         while(l != outer) {
486                 last = l;
487                 l = get_loop_outer_loop(l);
488         }
489         return last;
490 }
491
492 /**
493  * Topologic bottom-up walker sets links of infinite loops to non-zero.
494  * Block marks are used to flag blocks reachable (from end) on the one hand,
495  * on the other hand they are set if the block is not part of an infinite loop.
496  */
497 static void infinite_loop_walker(ir_node *block, void *env)
498 {
499         int arity;
500         int i;
501         (void) env;
502
503         if (! is_Block(block))
504                 return;
505
506         /* start block has no predecessors */
507         if (block == environ->start_block)
508                 return;
509
510         arity = get_irn_arity(block);
511
512         /* block not part of a real loop: no infinite loop */
513         if (get_irn_loop(block) == get_irg_loop(environ->graph))
514                 set_Block_mark(block, 1);
515
516         if (get_Block_mark(block)) {
517                 /* reachable block: mark all cf predecessors */
518                 for (i = 0; i < arity; ++i) {
519                         ir_node *pred = get_Block_cfgpred_block(block, i);
520                         if (is_Bad(pred))
521                                 continue;
522                         set_Block_mark(pred, 1);
523                 }
524         } else {
525                 /* We are in a real loop and see an unreachable block. */
526                 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
527
528                 /* flag loop as infinite */
529                 set_loop_link(outermost_loop, outermost_loop);
530                 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
531
532                 /* The cf predecessors are unreachable, but can never be part of
533                    an infinite loop, because we just reached them. So we set the
534                    blockmark to prevent triggering the infinite loop detection. */
535
536                 /* passing information to the cf predecessors */
537                 for (i = 0; i < arity; ++i) {
538                         ir_node *pred = get_Block_cfgpred_block(block, i);
539
540                         if (is_Bad(pred))
541                                 continue;
542
543                         /* If our cf predecessor is in the same endless loop,
544                            it is also unreachable. */
545                         if (in_loop(pred, outermost_loop)) {
546                                 set_Block_mark(pred, 0);
547                         } else {
548                                 /* When we leave the unreachable loop, we artificially
549                                    declare the cf predecessor reachable. */
550                                 set_Block_mark(pred, 1);
551                         }
552                 }
553         }
554 }
555
556 /**
557  * Sets loop links of outermost infinite loops to non-zero.
558  */
559 static void analyse_loops(ir_graph *irg)
560 {
561         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
562
563         /* reset block mark and loop links */
564         irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
565
566         /* mark end block reachable */
567         set_Block_mark(get_irg_end_block(irg), 1);
568         irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
569
570         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
571 }
572
573 #if NO_INF_LOOPS || NO_INF_LOOPS2
574 /**
575  * Returns non-zero if block is part of an infinite loop.
576  */
577 static unsigned is_in_infinite_loop(ir_node *block)
578 {
579         ir_loop *loop;
580
581         assert(is_Block(block));
582         loop = get_irn_loop(block);
583         assert(loop);
584
585         loop = get_loop_outermost(loop);
586         if (loop)
587                 return (get_loop_link(loop) != NULL);
588         else
589                 return 0;
590 }
591 #endif
592
593 /* --------------------------------------------------------
594  * GVN-PRE Exp_gen
595  * --------------------------------------------------------
596  */
597
598 /**
599  * Returns non-zero if a node is movable and a possible candidate for PRE.
600  */
601 static unsigned is_nice_value(ir_node *n)
602 {
603         ir_mode *mode = get_irn_mode(n);
604
605         /* positive group */
606
607         if (is_Phi(n))
608                 return 1;
609
610 #if LOADS || DIVMODS
611         if (is_Proj(n))
612                 return 1;
613 #endif
614
615 #if LOADS
616         if (is_Load(n))
617                 return (get_Load_volatility(n) == volatility_non_volatile);
618 #endif
619
620         /* negative group */
621
622         if (get_irn_pinned(n) == op_pin_state_pinned)
623                 return 0;
624
625         if (! mode_is_data(mode)) {
626                 if (! is_Div(n) && ! is_Mod(n))
627                         return 0;
628         }
629         return 1;
630 }
631
632 /**
633  * Checks if a node n is clean in block block for exp_gen.
634  *
635  * @param n      the node
636  * @param block  the block
637  * @return non-zero value for clean node
638  */
639 static unsigned is_clean_in_block_expgen(ir_node *n, ir_node *block)
640 {
641         int i, arity;
642
643         if (is_Phi(n))
644                 return 1;
645
646         if (! is_nice_value(n))
647                 return 0;
648
649         arity = get_irn_arity(n);
650         for (i = 0; i < arity; ++i) {
651                 ir_node *pred = get_irn_n(n, i);
652
653                 /* sufficient for exp_gen because block is always node's block */
654                 if (get_nodes_block(pred) != block)
655                         continue;
656
657                 /* predecessor is in block,
658                    so it needs to be clean */
659                 if (get_irn_link(pred)) {
660                         continue;
661                 } else {
662                         DB((dbg, LEVEL_3, "unclean %+F because pred %+F unclean\n", n, pred));
663                         return 0;
664                 }
665         }
666         return 1;
667 }
668
669 /**
670  * Topological walker puts nodes in top-down topological order into exp_gen set.
671  * Assumed to walk blockwise and nodewise topologically top-down.
672  *
673  * @param irn    the node
674  * @param ctx    the environment
675  */
676 static void topo_walker(ir_node *irn, void *ctx)
677 {
678         ir_node    *block;
679         block_info *info;
680         ir_node    *value;
681         (void) ctx;
682
683         if (is_Block(irn))
684                 return;
685
686         /* GVN step: remember the value. Predecessors need to be visited. */
687         value = remember(irn);
688
689         block = get_nodes_block(irn);
690         info  = get_block_info(block);
691
692         /* values for avail_out do not need to be clean */
693         ir_valueset_insert(info->avail_out, value, irn);
694
695         /* no need to put constants into the sets: they are always redundant */
696         if (! is_nice_value(irn) || is_irn_constlike(irn))
697                 return;
698
699         if (is_clean_in_block_expgen(irn, block)) {
700                 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
701
702                 ir_valueset_insert(info->exp_gen, value, irn);
703                 /* flag irn as clean*/
704                 set_irn_link(irn, irn);
705         } else {
706                 /* flag irn as not clean */
707                 set_irn_link(irn, NULL);
708         }
709 }
710
711 /* --------------------------------------------------------
712  * GVN-PRE Antic_in
713  * --------------------------------------------------------
714  */
715
716 /**
717  * Gets result of nodes phi translation into block.
718  *
719  * @param node   the node
720  * @param block  the target block
721  *
722  * @return a phi translation of node node into block block or NULL
723  */
724 static ir_node *get_translated(ir_node *block, ir_node *node)
725 {
726         if (is_irn_constlike(node))
727                 return node;
728
729         return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
730 }
731
732 /**
733  * Saves result of phi translation of node into predecessor
734  * at pos of block succ.
735  *
736  * @param node   the node
737  * @param succ   the successor of the translation target block
738  * @param pos    the position of the predecessor block
739  * @param trans  the translation result
740  *
741  */
742 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
743 {
744         if (is_irn_constlike(node))
745                 return;
746         /* insert or replace */
747         ir_nodehashmap_insert(map, node, trans);
748 }
749
750 /**
751  * Checks if node is hoistable into block.
752  *
753  * A clean node in block block can be hoisted above block succ.
754  * A node is not clean if its representative is killed in block block.
755  * The node can still be hoisted into block block.
756  *
757  * @param n      the node to be checked
758  * @param block  the block to be hoisted into
759  * @returns      block which node can be hoisted above
760  */
761 static ir_node *is_hoistable_above(ir_node *node, ir_node *block, ir_node *succ)
762 {
763         int i;
764         int arity = get_irn_arity(node);
765
766         /* make sure that node can be hoisted above block */
767
768         if (is_irn_constlike(node))
769                 return block;
770
771         for (i = 0; i < arity; ++i) {
772                 block_info *info       = get_block_info(block);
773                 ir_node    *pred       = get_irn_n(node, i);
774                 ir_node    *pred_value = identify(pred);
775                 ir_node    *pred_block = get_nodes_block(pred);
776
777                 /* remove TMP_GEN */
778                 /* is predecessor not known to be clean */
779                 if (! ir_valueset_lookup(info->antic_in, pred_value)) {
780                         /* is it possible to hoist node above block? */
781                         if (! block_strictly_dominates(pred_block, block)) {
782                                 DB((dbg, LEVEL_3, "unclean %+F: %+F (%+F) not antic\n", node, pred, pred_value));
783                                 return succ;
784                         }
785                 }
786
787                 /* predecessor value to be antic in is not enough.
788                    if we didn't translate the exact representative we cannot translate */
789                 if (! get_translated(pred_block, pred)) {
790                         if (! block_strictly_dominates(pred_block, block)) {
791                                 DB((dbg, LEVEL_3, "unclean %+F: %+F (%+F) lost of representative\n", node, pred, pred_value));
792                                 return NULL;
793                         }
794                 }
795         }
796         return block;
797 }
798
799 #if LOADS || DIVMODS
800 /* Helper function to compare the values of pred and avail_pred. */
801 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
802 {
803         ir_node *avail_value  = identify(avail_pred);
804         ir_node *trans_pred   = get_translated_pred(pred, block, pos);
805         ir_node *value;
806
807         if (trans_pred == NULL)
808                 trans_pred = pred;
809         value = identify(trans_pred);
810
811         DB((dbg, LEVEL_3, "manual compare %+F  %+F\n", pred, avail_pred));
812
813         return (value == avail_value);
814 }
815 #endif
816
817 #if LOADS
818 /**
819  * Does phi translation for redundant load nodes only.
820  * Returns NULL for non-redundant loads, which need to be phi translated.
821  * Loads are compared by comparing their pointer values,
822  * and assuring that they are adjacent.
823  * This is equivalent to what phi_translation does,
824  * when a new node is created and then GCSE optimized resulting
825  * in an already available node.
826  */
827 static ir_node *phi_translate_load(ir_node *load, ir_node *block, int pos)
828 {
829         ir_node *mem   = get_Load_mem(load);
830         ir_node *trans = get_translated_pred(mem, block, pos);
831
832         if (trans == NULL)
833                 trans = mem;
834
835         /* no partial redundancy if this is a mode_M phi */
836         if (is_Proj(trans)) {
837                 /* The last memory operation in predecessor block */
838                 ir_node *avail_load = get_Proj_pred(trans);
839
840                 /* memop is a load with matching type */
841                 if (is_Load(avail_load) &&
842                             get_Load_mode(load) == get_Load_mode(avail_load)) {
843
844                         unsigned match = match_pred(get_Load_ptr(load), get_Load_ptr(avail_load), block, pos);
845
846                         if (match)
847                                 return avail_load;
848                 }
849         }
850         return NULL;
851 }
852 #endif
853
854 #if DIVMODS
855 /**
856  * Does phi translation for redundant Div/Mod nodes only.
857  * Returns NULL for non-redundant node, which needs to be phi translated.
858  */
859 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
860 {
861         ir_node *mem   = get_memop_mem(divmod);
862         ir_node *trans = get_translated_pred(mem, block, pos);
863
864         if (trans == NULL)
865                 trans = mem;
866
867         /* no partial redundancy if this is a mode_M phi */
868         if (is_Proj(trans)) {
869                 /* The last memory operation in predecessor block */
870                 ir_node *avail_op = get_Proj_pred(trans);
871
872                 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
873                         unsigned left, right;
874
875                         if (is_Div(avail_op)) {
876                                 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
877                                     get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
878
879                                         left  = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
880                                         right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
881
882                                         if (left && right)
883                                                 return avail_op;
884                                 }
885                         } else if (is_Mod(avail_op)) {
886                             if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
887
888                                         left  = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
889                                         right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
890
891                                         if (left && right)
892                                                 return avail_op;
893                                 }
894                         }
895                 }
896         }
897         return NULL;
898 }
899 #endif
900
901 /**
902  * Translates an expression above a Phi.
903  *
904  * @param node        the node
905  * @param block       the block the node is translated into
906  * @param pos         the input number of the destination block
907  *
908  * @return a node representing the translated value
909  */
910 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_node *pred_block)
911 {
912         int       i;
913         int       arity;
914         ir_node **in;
915         ir_node  *nn;
916         int       needed;
917
918         if (is_Phi(node)) {
919                 if (get_nodes_block(node) == block)
920                         return get_Phi_pred(node, pos);
921                 /* this phi does not need translation */
922                 return node;
923         }
924
925         arity = get_irn_arity(node);
926
927 #if LOADS
928         if (is_Load(node)) {
929                 ir_node *avail_load = phi_translate_load(node, block, pos);
930                 if (avail_load)
931                         return avail_load;
932         }
933 #endif
934
935 #if DIVMODS
936         if (is_Div(node) || is_Mod(node)) {
937                 ir_node *avail_op = phi_translate_divmod(node, block, pos);
938                 if (avail_op)
939                         return avail_op;
940         }
941 #endif
942
943         needed = 0;
944         /* insert phase enforces translation for previously not translated nodes */
945         if (environ->insert_phase)
946                 needed = 1;
947
948         in = XMALLOCN(ir_node *, arity);
949
950         for (i = 0; i < arity; ++i) {
951                 ir_node *pred       = get_irn_n(node, i);
952                 /* we cannot find this value in antic_in, because the value
953                    has (possibly) changed! */
954                 ir_node *pred_trans = get_translated(pred_block, pred);
955                 ir_node *expr;
956
957                 if (pred_trans == NULL) {
958                         /* reasons for this are:
959                            1. pred not dominated by block: use predecessor.
960                            2. dangling-represenatative: predecessor not translated.
961                            We cannot phi translate, it will be the wrong value. */
962                         expr = pred;
963                 } else {
964                         expr = pred_trans;
965                         /* predecessor value changed, so translation is needed */
966                         if (identify(expr) != identify(pred))
967                                 needed |= 1;
968                 }
969
970                 /* We need to build a value tree. But values cannot be translated,
971                    expressions can be. So we translate the expressions and let GVN
972                    identify their values. */
973                 in[i] = expr;
974         }
975
976         if (! needed)
977                 return node;
978
979         DB((dbg, LEVEL_3, "Translate\n"));
980         /* copy node to represent the new value.
981            We do not translate nodes that do not need translation,
982            so we use the newly created nodes as value representatives only.
983            Their block is not important, because we create new ones during
984            insert_nodes(). */
985         nn = new_ir_node(
986                 get_irn_dbg_info(node),
987                 environ->graph,
988                 get_Block_cfgpred_block(block, pos),
989                 get_irn_op(node),
990                 get_irn_mode(node),
991                 arity,
992                 in);
993         free(in);
994         /* We need the attribute copy here, because the Hash value of a
995            node might depend on it. */
996         copy_node_attr(environ->graph, node, nn);
997         /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
998            because it already uses availability. */
999
1000         DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
1001         return nn;
1002 }
1003
1004 /**
1005  * Block-walker, computes Antic_in(block).
1006  * Builds a value tree out of the graph by translating values
1007  * over phi nodes.
1008  * Although the algorithm works on values, constructing the value tree
1009  * depends on actual representations through nodes and their actual
1010  * predecessors.
1011  * By using only one representative (GVN-PRE) for every value, we have to be
1012  * careful not to break the topological order during translation. If a node
1013  * needs to be translated, but its predecessor - a representative in the same
1014  * antic_in scope - has not been, then it has to be killed.
1015  * one-representative-problem: Two blocks yield the same value through
1016  *    completely different calculations. The value is common, but the
1017  *    representative cannot be phi translated, because the predecessors
1018  *    haven't been.
1019  *    If a value is in exp_gen and also in antic_in of the successor,
1020  *    a similar situation sets in.
1021  *
1022  * By using antic_in exclusively, we lose information about translated nodes.
1023  * We need to permanently keep the translated nodes list.
1024  * For insert_nodes() we actually need antic_out. But antic_out is not usable,
1025  * because every value had to be cross-looked up in every available set.
1026  *
1027  * If we used antic_out, the translated nodes list would not be necessary
1028  * permanently, because instead of looking for translated(antic_in) we could
1029  * just use antic_out of the predecessor block.
1030  *
1031  * @param block  the block
1032  * @param ctx    the walker environment
1033  */
1034 static void compute_antic(ir_node *block, void *ctx)
1035 {
1036         pre_env                *env       = (pre_env*)ctx;
1037         block_info             *succ_info;
1038         block_info             *info;
1039         ir_node                *succ;
1040         ir_node                *value;
1041         ir_node                *expr;
1042         size_t                  size;
1043         ir_valueset_iterator_t  iter;
1044         int                     n_succ;
1045
1046         /* filter blocks from topological walker */
1047         if (! is_Block(block))
1048                 return;
1049
1050         /* the end block has no successor */
1051         if (block == env->end_block)
1052                 return;
1053
1054         info = get_block_info(block);
1055         /* track changes */
1056         size = ir_valueset_size(info->antic_in);
1057         n_succ = get_Block_n_cfg_outs(block);
1058
1059         if (env->first_iter) {
1060                 foreach_valueset(info->exp_gen, value, expr, iter) {
1061                         ir_valueset_insert(info->antic_in, value, expr);
1062                 }
1063         }
1064
1065         /* successor might have phi nodes */
1066         if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
1067                 succ      = get_Block_cfg_out(block, 0);
1068                 int pos   = get_Block_cfgpred_pos(succ, block);
1069                 succ_info = get_block_info(succ);
1070
1071                 /* initialize translated set */
1072                 if (env->first_iter) {
1073                         info->trans = XMALLOC(ir_nodehashmap_t);
1074                         ir_nodehashmap_init(info->trans);
1075                 }
1076
1077                 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1078                         /* we translate the expression over the phi node,
1079                            remember() builds the value tree */
1080                         ir_node *trans       = phi_translate(expr, succ, pos, block);
1081                         /* create new value if necessary */
1082                         ir_node *trans_value = identify_or_remember(trans);
1083                         ir_node *represent;
1084                         ir_node *hoistable;
1085
1086                         DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1087
1088                         /* on value change (phi present) we need the translated node
1089                            to represent the new value for possible further translation. */
1090                         if (value != trans_value)
1091                                 represent = trans;
1092                         else
1093                                 represent = expr;
1094
1095                         hoistable = is_hoistable_above(expr, block, succ);
1096                         //if (is_clean_in_block_antic(expr, block)) {
1097                         if (hoistable != NULL) {
1098                                 /* Where is the difference between replace and insert?
1099                                    If we replace, we might use a representative with non translated*/
1100                                 ir_valueset_insert(info->antic_in, trans_value, represent);
1101                                 DB((dbg, LEVEL_3, "Translated %+F repr %+F\n", expr, represent));
1102
1103                                 if (hoistable == block)
1104                                         ir_valueset_set_link(info->antic_in, trans_value, trans_value);
1105                                 else
1106                                         /* hoistable into block but not above */
1107                                         ir_valueset_set_link(info->antic_in, trans_value, NULL);
1108                         }
1109                         /* during insert we use the translated node, because it may be
1110                            hoisted into block whilst being not anticipated there. */
1111                         set_translated(info->trans, expr, represent);
1112                 }
1113         } else if (n_succ > 1) {
1114                 int         i;
1115                 ir_node    *common     = NULL;
1116                 ir_node    *succ0      = get_Block_cfg_out(block, 0);
1117                 block_info *succ0_info = get_block_info(succ0);
1118
1119                 /* disjoint of antic_ins */
1120                 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1121                         ir_node *hoistable;
1122
1123                         /* iterate over remaining successors */
1124                         for (i = 1; i < n_succ; ++i) {
1125                                 ir_node    *succ      = get_Block_cfg_out(block, i);
1126                                 block_info *succ_info = get_block_info(succ);
1127
1128                                 /* value in antic_in? */
1129                                 common = ir_valueset_lookup(succ_info->antic_in, value);
1130                                 if (common == NULL)
1131                                         break;
1132                         }
1133
1134                         hoistable = is_hoistable_above(expr, block, succ0);
1135
1136                         if (common && hoistable != NULL) {
1137                                 ir_valueset_insert(info->antic_in, value, expr);
1138
1139                                 if (hoistable == block)
1140                                         ir_valueset_set_link(info->antic_in, value, value);
1141                                 else
1142                                         /* hoistable into block but not above */
1143                                         ir_valueset_set_link(info->antic_in, value, NULL);
1144
1145                                 DB((dbg, LEVEL_3, "common and clean %+F(%+F) in %+F\n", expr, value, block));
1146                         }
1147                 }
1148         }
1149
1150         DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1151
1152         if (size != ir_valueset_size(info->antic_in))
1153                 env->changes |= 1;
1154 }
1155
1156 /* --------------------------------------------------------
1157  * Main algorithm Avail_out
1158  * --------------------------------------------------------
1159  */
1160
1161 /**
1162  * Computes Avail_out(block):
1163  *
1164  * Avail_in(block)  = Avail_out(dom(block))
1165  * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1166  *
1167  * Precondition:
1168  *  This function must be called in the top-down topological order:
1169  *  Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1170  *
1171  * @param block   the block
1172  * @param ctx     walker context
1173  */
1174 static void compute_avail_top_down(ir_node *block, void *ctx)
1175 {
1176         pre_env    *env   = (pre_env*)ctx;
1177         block_info *info;
1178
1179         if (block == env->end_block)
1180                 return;
1181
1182         info  = get_block_info(block);
1183
1184         /* Add all nodes from the immediate dominator.
1185            This ensures that avail_out contains the leader. */
1186         if (block != env->start_block) {
1187                 ir_node                *dom_block = get_Block_idom(block);
1188                 block_info             *dom_info  = get_block_info(dom_block);
1189                 ir_node                *value;
1190                 ir_node                *expr;
1191                 ir_valueset_iterator_t  iter;
1192
1193                 foreach_valueset(dom_info->avail_out, value, expr, iter) {
1194                         /* use first available expr as leader */
1195                         ir_valueset_replace(info->avail_out, value, expr);
1196                 }
1197         }
1198
1199         DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1200 }
1201
1202 /* --------------------------------------------------------
1203  * Main algorithm redundancy detection
1204  * --------------------------------------------------------
1205  */
1206
1207 /**
1208  * Flags node irn redundant depending on redundant parameter.
1209  */
1210 static void flag_redundant(ir_node *irn, unsigned redundant)
1211 {
1212         if (redundant) {
1213                 set_irn_link(irn, irn);
1214         } else {
1215                 set_irn_link(irn, NULL);
1216         }
1217 }
1218
1219 /*
1220  * Returns redundant flag of node irn.
1221  */
1222 static unsigned is_redundant(ir_node *irn)
1223 {
1224         return (get_irn_link(irn) != NULL);
1225 }
1226
1227 /**
1228  * Returns a valid mode if the value of expr is a partially redundant value.
1229  *
1230  * @param block   the block
1231  * @param expr    the expression
1232  *
1233  * @return mode of the expression if it is partially redundant else NULL
1234  */
1235 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1236 {
1237         ir_node *first_avail         = NULL;
1238         int      pos;
1239         int      arity               = get_irn_arity(block);
1240         int      fully_redundant     = 1;
1241         int      partially_redundant = 0;
1242         ir_mode *mode                = NULL;
1243
1244         DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1245
1246         /* for each predecessor blocks */
1247         for (pos = 0; pos < arity; ++pos) {
1248                 block_info *pred_info;
1249                 ir_node    *pred_block  = get_Block_cfgpred_block(block, pos);
1250                 ir_node    *trans_expr;
1251                 ir_node    *trans_value;
1252                 ir_node    *avail_expr;
1253
1254                 /* ignore bad blocks. */
1255                 if (is_Bad(pred_block))
1256                         continue;
1257
1258                 pred_info  = get_block_info(pred_block);
1259                 trans_expr = get_translated(pred_block, expr);
1260                 trans_value = identify(trans_expr);
1261
1262                 /* value might be available through a constant */
1263                 if (is_irn_constlike(trans_expr)) {
1264                         avail_expr = trans_expr;
1265                         if (get_irn_idx(trans_expr) > environ->last_idx) {
1266                                 /* limit new constants */
1267                                 ir_mode   *cmode  = get_irn_mode(trans_expr);
1268                                 ir_tarval *upper = new_tarval_from_long(127, cmode);
1269                                 ir_tarval *lower = new_tarval_from_long(-127, cmode);
1270                                 ir_tarval *c     = get_Const_tarval(trans_expr);
1271
1272                                 if (tarval_cmp(lower, c) != ir_relation_greater_equal &&
1273                                     tarval_cmp(c, upper) != ir_relation_greater_equal) {
1274                                         avail_expr = NULL;
1275                                 }
1276                         }
1277             } else {
1278                         avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1279                 }
1280                 DB((dbg, LEVEL_3, "avail_expr %+F\n", avail_expr));
1281
1282                 if (avail_expr == NULL) {
1283                         pred_info->avail = trans_expr;
1284                         pred_info->found = 0;
1285                         fully_redundant  = 0;
1286                 } else {
1287                         /* expr is available */
1288                         pred_info->avail    = avail_expr;
1289                         pred_info->found    = 1;
1290                         mode                = get_irn_mode(avail_expr);
1291                         partially_redundant = 1;
1292
1293                         if (first_avail == NULL)
1294                                 first_avail = avail_expr;
1295                         else if (first_avail != avail_expr)
1296                                 /* Multiple different expressions are available */
1297                                 fully_redundant = 0;
1298
1299                         DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1300                 }
1301         }
1302
1303 #if BETTER_GREED
1304         /* value is redundant from last iteration,
1305            but has not been removed from antic_in (is not optimized) */
1306         if (! environ->first_iter && is_redundant(expr))
1307                 return mode;
1308 #endif
1309
1310         /* If it is not the same value already existing along every predecessor
1311        and it is defined by some predecessor then it is partially redundant. */
1312         if (! partially_redundant || fully_redundant)
1313                 return NULL;
1314         return mode;
1315 }
1316
1317 /**
1318  * Updates the new_set of a block by adding the new_set of
1319  * the immediate dominating block.
1320  *
1321  * @param  the block
1322  */
1323 static void update_new_set(ir_node *block, ir_node *idom)
1324 {
1325         ir_node                *value;
1326         ir_node                *expr;
1327         ir_valueset_iterator_t  iter;
1328         block_info             *curr_info = get_block_info(block);
1329         block_info             *idom_info = get_block_info(idom);
1330         int                     updated   = 0;
1331
1332         DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1333         foreach_valueset(idom_info->new_set, value, expr, iter) {
1334                 /* inherit new_set from immediate dominator */
1335                 ir_valueset_insert(curr_info->new_set, value, expr);
1336                 /* replace in avail_out */
1337                 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1338         }
1339 #ifdef DEBUG_libfirm
1340         if (updated)
1341                 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1342 #endif
1343 } /* update_new_set */
1344
1345 /**
1346  * Checks if hoisting irn is greedy.
1347  * Greedy hoisting means that there are non partially redundant nodes
1348  * hoisted. This happens if a partially redundant node has
1349  * non redundant predecessors.
1350  */
1351 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1352 {
1353         int arity = get_irn_arity(irn);
1354         int i;
1355
1356         for (i = 0; i < arity; ++i) {
1357                 ir_node *pred  = get_irn_n(irn, i);
1358
1359                 if (! block_strictly_dominates(get_nodes_block(pred), block) && ! is_redundant(pred))
1360                         return 1;
1361         }
1362         return 0;
1363 }
1364
1365 /**
1366  * Perform insertion of partially redundant values.
1367  * For every block node, do the following:
1368  * 1.  Propagate the NEW_SETS of the dominator into the current block.
1369  * If the block has multiple predecessors,
1370  *     2a. Iterate over the ANTIC expressions for the block to see if
1371  *         any of them are partially redundant.
1372  *     2b. If so, insert them into the necessary predecessors to make
1373  *         the expression fully redundant.
1374  *     2c. Insert a new Phi merging the values of the predecessors.
1375  *     2d. Insert the new Phi, and the new expressions, into the
1376  *         NEW_SETS set.
1377  *
1378  * @param block  the block
1379  * @param ctx    the walker environment
1380  */
1381 static void insert_nodes(ir_node *block, void *ctx)
1382 {
1383         pre_env                *env    = (pre_env*)ctx;
1384         int                     arity  = get_irn_arity(block);
1385         ir_node                *value;
1386         ir_node                *expr;
1387         block_info             *info;
1388         ir_node                *idom;
1389         int                     pos;
1390         ir_valueset_iterator_t  iter;
1391
1392         /* only blocks */
1393         if (! is_Block(block))
1394                 return;
1395
1396         /* ensure that even the start block has a new_set */
1397         info = get_block_info(block);
1398         if (info->new_set)
1399                 ir_valueset_del(info->new_set);
1400         info->new_set = ir_valueset_new(16);
1401
1402         if (block == env->start_block)
1403                 return;
1404
1405         DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1406
1407         idom = get_Block_idom(block);
1408         update_new_set(block, idom);
1409
1410         /* process only path joining blocks */
1411         if (arity < 2) {
1412                 return;
1413         }
1414
1415 #if BETTER_GREED
1416         plist_t *stack = plist_new();
1417 #endif
1418
1419         /* This is the main reason we choose to use antic_in over antic_out;
1420            we may iterate over every anticipated value first and not
1421            over the predecessor blocks. */
1422         foreach_valueset(info->antic_in, value, expr, iter) {
1423                 ir_mode  *mode;
1424                 ir_node  *phi;
1425                 ir_node **phi_in;
1426
1427                 /* already done? */
1428                 if (ir_valueset_lookup(info->antic_done, value))
1429                         continue;
1430
1431                 /* hoistable above block? */
1432                 if (ir_valueset_get_link(info->antic_in, value) == NULL)
1433                         continue;
1434
1435 #if BETTER_GREED
1436                 plist_insert_front(stack, expr);
1437 #endif
1438
1439                 /* filter phi nodes from antic_in */
1440                 if (is_Phi(expr)) {
1441                         flag_redundant(expr, 1);
1442                         continue;
1443                 }
1444
1445                 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1446
1447                 /* A value computed in the dominator is totally redundant.
1448                    Hence we have nothing to insert. */
1449                 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1450                         flag_redundant(expr, 1);
1451                         DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1452                         DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1453
1454                         continue;
1455                 }
1456
1457 #if !BETTER_GREED
1458                 if (is_hoisting_greedy(expr, block))
1459                         continue;
1460 #endif
1461
1462                 mode = is_partially_redundant(block, expr, value);
1463                 if (mode == NULL) {
1464                         flag_redundant(expr, 0);
1465                         continue;
1466                 } else {
1467                         flag_redundant(expr, 1);
1468                 }
1469
1470 #if BETTER_GREED
1471                 if (is_hoisting_greedy(expr, block))
1472                         continue;
1473 #endif
1474
1475 #if LOADS || DIVMODS
1476                 /* save old mode_M phis to remove keepalive edges later */
1477                 if (is_memop(expr)) {
1478                         ir_node *mem = get_memop_mem(expr);
1479                         if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1480                                 ir_nodeset_insert(env->keeps, mem);
1481                         }
1482                 }
1483 #endif
1484
1485 #ifdef DEBUG_libfirm
1486                 if (! is_Proj(expr)) {
1487                         if (env->first_iter)
1488                                 inc_stats(gvnpre_stats->first_iter_found);
1489                         inc_stats(gvnpre_stats->partially);
1490                 }
1491                 if (is_Load(expr))
1492                         inc_stats(gvnpre_stats->loads);
1493                 else if (is_Div(expr) || is_Mod(expr))
1494                         inc_stats(gvnpre_stats->divmods);
1495 #endif
1496
1497                 phi_in = XMALLOCN(ir_node *, arity);
1498
1499                 /* for each predecessor block */
1500                 for (pos = 0; pos < arity; ++pos) {
1501                         ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1502                         block_info *pred_info;
1503
1504                         /* ignore bad blocks. */
1505                         if (is_Bad(pred_block)) {
1506                                 ir_graph *irg = get_irn_irg(pred_block);
1507                                 phi_in[pos] = new_r_Bad(irg, mode);
1508                                 continue;
1509                         }
1510                         pred_info = get_block_info(pred_block);
1511
1512                         if (! pred_info->found) {
1513                                 ir_node *trans = get_translated(pred_block, expr);
1514
1515                                 assert(trans);
1516                                 if (trans == expr) {
1517                                         /* has been translated if ancestor had a phi and was translated */
1518                                         /* also non phi descendants can be redundant, but have
1519                                            not yet been (phi) translated. */
1520                                         trans = phi_translate(expr, block, pos, pred_block);
1521                                         set_translated(pred_info->trans, expr, trans);
1522                                 }
1523
1524                                 DB((dbg, LEVEL_3, "Use new %+F in %+F because expr %+F not available\n", trans, pred_block, expr));
1525
1526                                 /* value is now available in target block through trans
1527                                    insert (not replace) because it has not been available */
1528                                 ir_valueset_insert(pred_info->avail_out, value, trans);
1529                                 phi_in[pos] = trans;
1530                         } else {
1531                                 /* value available */
1532                                 phi_in[pos] = pred_info->avail;
1533                         }
1534                         DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1535                 }
1536
1537                 /* We do not connect tuples as they will be connected automatically
1538                    by the corresponding projections. */
1539                 if (get_irn_mode(expr) != mode_T) {
1540
1541                         phi = new_r_Phi(block, arity, phi_in, mode);
1542                         DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1543
1544                         /* this value is now available through the new phi */
1545                         ir_valueset_replace(info->avail_out, value, phi);
1546                         ir_valueset_insert(info->new_set, value, phi);
1547                 }
1548                 free(phi_in);
1549
1550 #if 0
1551                 /* remove from antic_in, because expr is not anticipated
1552                    anymore in this block */
1553                 ir_valueset_remove_iterator(info->antic_in, &iter);
1554 #endif
1555                 ir_valueset_insert(info->antic_done, value, expr);
1556
1557                 env->changes |= 1;
1558         }
1559
1560 #if BETTER_GREED
1561         if (env->changes) {
1562                 /* iterate in inverse topological order */
1563                 plist_element_t *it;
1564                 foreach_plist(stack, it) {
1565                         ir_node *irn = (ir_node *)plist_element_get_value(it);
1566                         int j;
1567                         char redundant = 1;
1568                         ir_node *block = get_nodes_block(irn);
1569
1570                         /* does irn only have redundant successors? */
1571
1572                         if (! get_irn_outs_computed(irn))
1573                                 continue;
1574
1575                         for (j = get_irn_n_outs(irn) - 1; j >= 0; --j) {
1576                                 ir_node *succ = get_irn_out(irn, j);
1577
1578                                 /* if succ and irn are in the same block */
1579                                 if (get_nodes_block(succ) == block && is_redundant(succ)) {
1580                                         continue;
1581                                 } else {
1582                                         redundant = 0;
1583                                         break;
1584                                 }
1585                         }
1586
1587                         if (redundant)
1588                                 flag_redundant(irn, 1);
1589                 }
1590         }
1591         plist_free(stack);
1592 #endif
1593
1594 }
1595
1596 #if HOIST_HIGH
1597 /**
1598  * Dom tree block walker to insert nodes into the highest
1599  * possible block where their .
1600  *
1601  */
1602 static void hoist_high(ir_node *block, void *ctx)
1603 {
1604         pre_env                *env        = (pre_env*)ctx;
1605         block_info             *curr_info;
1606         ir_valueset_iterator_t  iter;
1607         ir_node                *expr;
1608         ir_node                *value;
1609         int                     arity      = get_irn_arity(block);
1610
1611         if (! is_Block(block))
1612                 return;
1613
1614         if (arity < 2)
1615                 return;
1616
1617         if (block == env->start_block)
1618                 return;
1619
1620         curr_info = get_block_info(block);
1621
1622         DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1623
1624         /* foreach entry optimized by insert_nodes */
1625         foreach_valueset(curr_info->antic_done, value, expr, iter) {
1626                 int pos;
1627
1628                 if (is_memop(expr) || is_Proj(expr))
1629                         continue;
1630
1631                 /* for each path to block */
1632                 for (pos = 0; pos < arity; ++pos) {
1633                         /* standard target is predecessor block */
1634                         ir_node    *target     = get_Block_cfgpred_block(block, pos);
1635                         block_info *pred_info  = get_block_info(target);
1636                         ir_node    *avail;
1637                         ir_node    *new_target;
1638                         ir_node    *last_target;
1639                         ir_node    *trans_expr;
1640                         ir_node    *trans_value;
1641                         ir_node    *dom;
1642                         int         avail_arity;
1643                         int         i;
1644                         unsigned    nest_depth;
1645
1646                         /* get phi translated value */
1647                         trans_expr  = get_translated(target, expr);
1648                         trans_value = identify(trans_expr);
1649                         avail = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1650
1651                         if (avail == NULL)
1652                                 avail = trans_expr;
1653
1654                         value = identify(avail);
1655                         avail_arity = get_irn_arity(avail);
1656
1657                         /* anticipation border */
1658                         new_target  = NULL;
1659                         last_target = NULL;
1660                         nest_depth  = get_loop_depth(get_irn_loop(target));
1661                         dom         = target;
1662
1663                         while(dom != environ->start_block) {
1664                                 dom = get_Block_idom(dom);
1665
1666                                 if (is_Bad(dom))
1667                                         break;
1668
1669                                 /* do not hoist into loops */
1670                                 if (get_loop_depth(get_irn_loop(dom)) > nest_depth)
1671                                         break;
1672
1673                                 if (get_loop_depth(get_irn_loop(dom)) < nest_depth)
1674                                         last_target = dom;
1675
1676                                 nest_depth = get_loop_depth(get_irn_loop(dom));
1677
1678                                 /* antic_in means that the expression is clean to be
1679                                    hoisted above block, but still into */
1680                                 new_target = dom;
1681                                 /* check if available node ist still anticipated and clean
1682                                    (clean is part of antic) */
1683                                 if (! ir_valueset_lookup(get_block_info(dom)->antic_in, value))
1684                                         break;
1685                         }
1686
1687                         /* No new target or does the available node already dominate the new_target? */
1688                         if (new_target) {
1689                                 DB((dbg, LEVEL_2, "leader block %+F\n", get_nodes_block(avail)));
1690                                 /* already same block or dominating?*/
1691                                 if (block_strictly_dominates(new_target, get_nodes_block(avail)))
1692                                         new_target = NULL;
1693                         }
1694
1695                         DB((dbg, LEVEL_2, "dom border %+F\n", new_target));
1696
1697                         /* check for uses of available ins on our path*/
1698                         for (i = 0; i < avail_arity; i++) {
1699                                 ir_node *pred       = get_irn_n(avail, i);
1700                                 ir_node *pred_block = get_nodes_block(avail);
1701                                 int      j;
1702
1703                                 if (new_target == NULL)
1704                                         break;
1705
1706                                 if (! get_irn_outs_computed(pred)) {
1707                                         new_target = NULL;
1708                                         break;
1709                                 }
1710
1711                                 /**/
1712                                 if (! block_strictly_dominates(pred_block, new_target)) {
1713                                         new_target = pred_block;
1714                                 }
1715
1716                                 /* outs of def*/
1717                                 for (j = get_irn_n_outs(pred) - 1; j >= 0; --j) {
1718                                         ir_node *succ = get_irn_out(pred, j);
1719
1720                                         /* on our path?*/
1721                                         /* is succ on this path? */
1722                                         if (block_strictly_dominates(get_nodes_block(succ), new_target)) {
1723                                                 ir_node *succ_value = identify(succ);
1724
1725                                                 /* pred not dead? */
1726                                                 if (succ_value != value) {
1727                                                         continue;
1728                                                 } else {
1729                                                         new_target = NULL;
1730                                                         break;
1731                                                 }
1732                                         }
1733                                 }
1734                         }
1735
1736                         /* only one usage on our path */
1737                         if (new_target) {
1738                                 /* push the available node up into */
1739                                 set_nodes_block(avail, new_target);
1740
1741                                 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1742                         }
1743                 }
1744         }
1745 }
1746 #endif
1747
1748 /* --------------------------------------------------------
1749  * Elimination of fully redundant nodes
1750  * --------------------------------------------------------
1751  */
1752
1753 /**
1754  * Walker which finds redundant nodes using avail_out sets
1755  * and exchanges them for existing ones.
1756  * We cannot change the graph here as this would affect
1757  * the hash values of the nodes.
1758  *
1759  * @param irn  the node
1760  * @param ctx  the walker environment
1761  */
1762 static void eliminate(ir_node *irn, void *ctx)
1763 {
1764         pre_env *env = (pre_env*)ctx;
1765
1766         if (! is_Block(irn)) {
1767                 ir_node    *block = get_nodes_block(irn);
1768                 block_info *info  = get_block_info(block);
1769                 ir_node    *value = identify(irn);
1770
1771                 if (value != NULL) {
1772                         ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1773
1774                         if (expr != NULL && expr != irn) {
1775                                 elim_pair *p = OALLOC(env->obst, elim_pair);
1776
1777                                 p->old_node = irn;
1778                                 p->new_node = expr;
1779                                 p->next     = env->pairs;
1780                                 if (get_irn_idx(expr) >= env->last_idx)
1781                                         p->reason = FS_OPT_GVN_PARTLY;
1782                                 else
1783                                         p->reason = FS_OPT_GVN_FULLY;
1784                                 env->pairs = p;
1785                                 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1786                         }
1787                 }
1788         }
1789 }  /* eliminate */
1790
1791 /**
1792  * Do all the recorded changes and optimize
1793  * newly created Phi's.
1794  *
1795  * @param pairs  list of elimination pairs
1796  */
1797 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1798 {
1799         elim_pair             *p;
1800         ir_nodeset_iterator_t iter;
1801         ir_node               *m_phi;
1802         ir_node               *end = environ->end_node;
1803
1804         for (p = pairs; p != NULL; p = p->next) {
1805                 /* might be already changed */
1806                 p->new_node = skip_Id(p->new_node);
1807
1808                 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1809
1810                 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1811                  * which we can optimize here */
1812                 if (is_Phi(p->new_node)) {
1813                         int      i;
1814                         ir_node *res = NULL;
1815
1816                         for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1817                                 ir_node *pred = get_irn_n(p->new_node, i);
1818
1819                                 if (pred != p->old_node) {
1820                                         if (res) {
1821                                                 res = NULL;
1822                                                 break;
1823                                         }
1824                                         res = pred;
1825                                 }
1826                         }
1827                         if (res) {
1828                                 exchange(p->new_node, res);
1829                                 p->new_node = res;
1830                         }
1831                 }
1832                 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1833
1834                 exchange(p->old_node, p->new_node);
1835         }
1836
1837         /* remove keep alive edges of unused mode_M phis */
1838         foreach_ir_nodeset(keeps, m_phi, iter) {
1839                 remove_End_keepalive(end, m_phi);
1840         }
1841 }  /* eliminate_nodes */
1842
1843 /* --------------------------------------------------------
1844  * GVN PRE pass
1845  * --------------------------------------------------------
1846  */
1847
1848 /**
1849  * Gvn_Pre algorithm.
1850  *
1851  * @param irg   the graph
1852  */
1853 static void gvn_pre(ir_graph *irg, pre_env *env)
1854 {
1855         unsigned              antic_iter;
1856         unsigned              insert_iter;
1857
1858         DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1859
1860         /* allocate block info */
1861         irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1862
1863         ir_nodehashmap_init(&value_map);
1864
1865         /* generate exp_gen */
1866         irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
1867         dump_all_expgen_sets(env->list);
1868
1869         /* compute the avail_out sets for all blocks */
1870         dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
1871
1872         /* compute the anticipated value sets for all blocks */
1873         antic_iter      = 0;
1874         env->first_iter = 1;
1875
1876         /* antic_in passes */
1877         do {
1878                 ++antic_iter;
1879                 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
1880                 env->changes = 0;
1881                 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
1882                 env->first_iter = 0;
1883                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1884         } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
1885
1886         DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
1887
1888         ir_nodeset_init(env->keeps);
1889         insert_iter       = 0;
1890         env->insert_phase = 1;
1891         env->first_iter   = 1;
1892         env->last_idx     = get_irg_last_idx(irg);
1893         /* compute redundant expressions */
1894         do {
1895                 ++insert_iter;
1896                 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
1897                 env->changes = 0;
1898                 /* TODO topologically top down would be better; fewer iterations. */
1899                 dom_tree_walk_irg(irg, insert_nodes, NULL, env);
1900                 env->first_iter = 0;
1901                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1902         } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
1903         DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
1904
1905 #if HOIST_HIGH
1906         dom_tree_walk_irg(irg, hoist_high, NULL, env);
1907 #endif
1908
1909         /* last step: eliminate nodes */
1910         irg_walk_graph(irg, NULL, eliminate, env);
1911         eliminate_nodes(env->pairs, env->keeps);
1912
1913         ir_nodeset_destroy(env->keeps);
1914 }
1915
1916 /**
1917  * Gvn_Pre pass for graph irg.
1918  *
1919  * @param irg   the graph
1920  */
1921 void do_gvn_pre(ir_graph *irg)
1922 {
1923         struct obstack        obst;
1924         pre_env               env;
1925         ir_nodeset_t          keeps;
1926         optimization_state_t  state;
1927         block_info           *block_info;
1928
1929         /* bads and unreachables cause too much trouble with dominance
1930            dominance
1931            loop info for endless loop detection
1932            no critical edges is GVN-PRE precondition
1933          */
1934         assure_irg_properties(irg,
1935                 IR_GRAPH_PROPERTY_NO_BADS
1936                 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
1937                 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
1938                 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
1939                 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
1940                 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
1941
1942         /* register a debug mask */
1943         FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
1944
1945         save_optimization_state(&state);
1946         environ = &env;
1947
1948         /* edges will crash if enabled due to our allocate on other obstack trick */
1949         edges_deactivate(irg);
1950         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
1951
1952         DEBUG_ONLY(init_stats();)
1953
1954         /* setup environment */
1955         obstack_init(&obst);
1956         env.graph        = irg;
1957         env.obst         = &obst;
1958         env.list         = NULL;
1959         env.start_block  = get_irg_start_block(irg);
1960         env.end_block    = get_irg_end_block(irg);
1961         env.end_node     = get_irg_end(irg);
1962         env.pairs        = NULL;
1963         env.keeps        = &keeps;
1964         env.insert_phase = 0;
1965
1966         /* Detect and set links of infinite loops to non-zero. */
1967         analyse_loops(irg);
1968
1969         /* Switch on GCSE. We need it to correctly compute
1970            the value of a node, which is independent from
1971            its block. */
1972         set_opt_global_cse(1);
1973         /* new_identities */
1974         if (irg->value_table != NULL)
1975                 del_pset(irg->value_table);
1976         /* initially assumed nodes in pset are 512 */
1977         irg->value_table = new_pset(compare_gvn_identities, 512);
1978
1979         /* do GVN-PRE pass */
1980         gvn_pre(irg, &env);
1981         DEBUG_ONLY(print_stats();)
1982
1983         /* clean up: delete all sets */
1984         for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
1985                 free_block_info(block_info);
1986         }
1987
1988         DEBUG_ONLY(free_stats();)
1989         ir_nodehashmap_destroy(&value_map);
1990         obstack_free(&obst, NULL);
1991         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
1992
1993         /* Pin the graph again.
1994            This is needed due to the use of set_opt_global_cse(1) */
1995         set_irg_pinned(irg, op_pin_state_pinned);
1996         restore_optimization_state(&state);
1997         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
1998
1999         /* TODO there are optimizations that try to use the existing value_table */
2000         new_identities(irg);
2001 }
2002
2003 /* Creates an ir_graph pass for do_gvn_pre. */
2004 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2005 {
2006         return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
2007 }