gvn_pre: Remove redundant start block test.
[libfirm] / ir / opt / gvn_pre.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Global Value Numbering Partial Redundancy Elimination
23  *          (VanDrunen Hosking 2004)
24  * @author  Michael Beck
25  * @brief
26  */
27 #include "config.h"
28
29 #include "debug.h"
30 #include "ircons.h"
31 #include "irdom.h"
32 #include "iredges.h"
33 #include "irflag.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
41 #include "irouts.h"
42 #include "irpass.h"
43 #include "valueset.h"
44 #include "irloop.h"
45 #include "firmstat_t.h"
46
47 #include "irgraph_t.h"
48 #include "irnode_t.h"
49 #include "iropt_t.h"
50 #include "plist.h"
51
52 /* suggested by GVN-PRE authors */
53 #define MAX_ANTIC_ITER 10
54 #define MAX_INSERT_ITER 3
55
56 /* Stops antic iteration from processing endless loops. */
57 #define IGNORE_INF_LOOPS 1
58 /* Stops antic information to flow over infinite loop backedge */
59 #define NO_INF_LOOPS 0
60
61 /* Attempt to reduce register pressure and reduce code size
62    for hoisted nodes. */
63 #define HOIST_HIGH 1
64 #define COMMON_DOM 1
65
66 /* Seamless implementation of handling loads and generally memory
67    dependent nodes with GVN-PRE. */
68 #define LOADS 1
69 #define DIVMODS 0
70
71 /* Experimental */
72
73 /* Only optimize node directly after phi node if node is only successor. */
74 #define MIN_CUT 0
75
76 /* GVN is very limited. This enables optimize_node during value recognition.
77    GVN ist still very limited, since a+1+b+1 doesn't equal a+b+2.
78    TODO Broken for yet unknown reasons. */
79 #define OPTIMIZE_NODES 0
80
81
82 /** Additional info we need for every block. */
83 typedef struct block_info {
84         ir_valueset_t     *exp_gen;    /* contains this blocks clean expressions */
85         ir_valueset_t     *avail_out;  /* available values at block end */
86         ir_valueset_t     *antic_in;   /* clean anticipated values at block entry */
87         ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert nodes phase */
88         ir_valueset_t     *new_set;    /* new by hoisting made available values */
89         ir_nodehashmap_t  *trans;      /* contains translated nodes translated into block */
90         ir_node           *avail;      /* saves available node for insert node phase */
91         int                found;      /* saves kind of availability for insert_node phase */
92         ir_node           *block;      /* block of the block_info */
93         struct block_info *next;       /* links all instances for easy access */
94 } block_info;
95
96 /**
97  * A pair of nodes to be exchanged.
98  * We have to defer the exchange because there are still needed references
99  * to certain nodes.
100  */
101 typedef struct elim_pair {
102         ir_node *old_node;      /* node that will be replaced */
103         ir_node *new_node;      /* replacement for old_node */
104         struct elim_pair *next; /* links all instances for easy access */
105         int     reason;         /* reason for the replacement */
106 } elim_pair;
107
108 /** environment for the GVN-PRE algorithm */
109 typedef struct pre_env {
110         ir_graph       *graph;        /* current graph */
111         struct obstack  obst;         /* obstack to allocate on */
112         ir_node        *start_block;  /* start block of the current graph */
113         ir_node        *end_block;    /* end block of the current graph */
114         ir_node        *end_node;     /* end node of the current graph */
115         block_info     *list;         /* block_info list head */
116         elim_pair      *pairs;        /* elim_pair list head */
117         ir_nodeset_t   *keeps;        /* a list of to be removed phis to kill their keep alive edges */
118         unsigned        last_idx;     /* last node index of input graph */
119         char            changes;      /* flag for fixed point iterations - non-zero if changes occurred */
120         char            first_iter;   /* non-zero for first fixed point iteration */
121         int             iteration;    /* iteration counter */
122 #if OPTIMIZE_NODES
123         pset           *value_table;   /* standard value table*/
124         pset           *gvnpre_values; /* gvnpre value table */
125 #endif
126 } pre_env;
127
128 static pre_env *environ;
129
130 /* custom GVN value map */
131 static ir_nodehashmap_t value_map;
132
133 /* debug module handle */
134 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
135
136 #ifdef DEBUG_libfirm
137
138 /* --------------------------------------------------------
139  * Statistics
140  * --------------------------------------------------------
141  */
142
143 typedef struct gvnpre_statistics {
144         int replaced;
145         int partially;
146         int fully;
147         int loads;
148         int divmods;
149         int hoist_high;
150         int first_iter_found;
151         int antic_iterations;
152         int insert_iterations;
153         int infinite_loops;
154 } gvnpre_statistics;
155
156 static gvnpre_statistics *gvnpre_stats = NULL;
157
158 static void init_stats(void)
159 {
160         gvnpre_stats = XMALLOCZ(gvnpre_statistics);
161 }
162
163 static void free_stats(void)
164 {
165         free(gvnpre_stats);
166         gvnpre_stats = NULL;
167 }
168
169 static void print_stats(void)
170 {
171         gvnpre_statistics *stats = gvnpre_stats;
172         DB((dbg, LEVEL_1, "replaced             : %d\n", stats->replaced));
173         DB((dbg, LEVEL_1, "antic_in iterations  : %d\n", stats->antic_iterations));
174         DB((dbg, LEVEL_1, "insert iterations    : %d\n", stats->insert_iterations));
175         DB((dbg, LEVEL_1, "infinite loops       : %d\n", stats->infinite_loops));
176         DB((dbg, LEVEL_1, "fully redundant      : %d\n", stats->fully));
177         DB((dbg, LEVEL_1, "partially redundant  : %d\n", stats->partially));
178         DB((dbg, LEVEL_1, "  loads                : %d\n", stats->loads));
179         DB((dbg, LEVEL_1, "  Divs/Mods            : %d\n", stats->divmods));
180         DB((dbg, LEVEL_1, "  hoist high           : %d\n", stats->hoist_high));
181         DB((dbg, LEVEL_1, "  first iteration      : %d\n", stats->first_iter_found));
182 }
183
184 #define set_stats(var, value) (var)=(value)
185 #define inc_stats(var)        ((var)+=1)
186
187 /* --------------------------------------------------------
188  * Dump valuesets
189  * --------------------------------------------------------
190  */
191
192 /**
193  * Dump a value set.
194  *
195  * @param set    the set to dump
196  * @param txt    a text to describe the set
197  * @param block  the owner block of the set
198  */
199 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
200 {
201         ir_valueset_iterator_t iter;
202         ir_node *value, *expr;
203         int i;
204
205         DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
206         i = 0;
207         foreach_valueset(set, value, expr, iter) {
208                 if ((i & 3) == 3)
209                         DB((dbg, LEVEL_2, "\n"));
210                 if (value != expr)
211                         DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
212                 else
213                         DB((dbg, LEVEL_2, " %+F,", expr));
214                 ++i;
215         }
216         DB((dbg, LEVEL_2, "\n}\n"));
217 }
218
219 /**
220  * Dump all exp_gen value sets.
221  *
222  * @param list  the list of block infos to retrieve the sets from
223  */
224 static void dump_all_expgen_sets(block_info *list)
225 {
226         block_info *block_info;
227
228         for (block_info = list; block_info != NULL; block_info = block_info->next) {
229                 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
230         }
231 }
232
233 #else
234
235 #define dump_all_expgen_sets(list)
236 #define dump_value_set(set, txt, block)
237
238 #endif /* DEBUG_libfirm */
239
240 /* --------------------------------------------------------
241  * GVN Functions
242  * --------------------------------------------------------
243  */
244
245 /**
246  * Compares node collisions in valuetable.
247  * Modified identities_cmp().
248  */
249 static int compare_gvn_identities(const void *elt, const void *key)
250 {
251         ir_node *a = (ir_node *)elt;
252         ir_node *b = (ir_node *)key;
253         int i, irn_arity_a;
254
255         if (a == b) return 0;
256
257         /* phi nodes kill predecessor values and are always different */
258         if (is_Phi(a) || is_Phi(b))
259                 return 1;
260
261         /* memops are not the same, even if we want to optimize them
262            we have to take the order in account */
263         if (is_memop(a) || is_memop(b) ||
264             get_irn_mode(a) == mode_T ||
265             get_irn_mode(b) == mode_T) {
266                 /* Loads with the same predecessors are the same value;
267                    this should only happen after phi translation. */
268                 if ((! is_Load(a) || ! is_Load(b)) && (! is_Store(a) || ! is_Store(b)))
269                         return 1;
270         }
271
272         if ((get_irn_op(a) != get_irn_op(b)) ||
273             (get_irn_mode(a) != get_irn_mode(b))) return 1;
274
275         /* compare if a's in and b's in are of equal length */
276         irn_arity_a = get_irn_arity(a);
277         if (irn_arity_a != get_irn_arity(b))
278                 return 1;
279
280         /* blocks are never the same */
281         if (is_Block(a) || is_Block(b))
282                 return 1;
283
284         /* should only be used with gcse enabled */
285         assert(get_opt_global_cse());
286
287         /* compare a->in[0..ins] with b->in[0..ins] */
288         for (i = 0; i < irn_arity_a; ++i) {
289                 ir_node *pred_a = get_irn_n(a, i);
290                 ir_node *pred_b = get_irn_n(b, i);
291                 if (pred_a != pred_b) {
292                         if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
293                                 return 1;
294                 }
295         }
296
297         /*
298          * here, we already now that the nodes are identical except their
299          * attributes
300          */
301         if (a->op->ops.node_cmp_attr)
302                 return a->op->ops.node_cmp_attr(a, b);
303
304         return 0;
305 }
306
307 /**
308  * Identify does a lookup in the GVN valuetable.
309  * To be used when no new GVN values are to be created.
310  *
311  * @param e  a node representing an expression
312  * @return a node representing the value
313  */
314 static ir_node *identify(ir_node *irn)
315 {
316         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
317         if (value)
318                 return value;
319         /* irn represents a new value, so return the leader */
320         return identify_remember(irn);
321 }
322
323 /**
324  * remember() adds node irn to the GVN valuetable.
325  * Identify_remember only identifies values of nodes with the
326  * same predecessor nodes (not values). By creating a node from the predecessor
327  * values/leaders, a true valuetree is built. Phis kill their predecessor value,
328  * so no circular dependencies need to be resolved.
329  *
330  * TODO Improvement:
331  *      Maybe this could be implemented with a custom node hash that takes
332  *      phi nodes and true values (instead of predecessors) into account,
333  *      resulting in value numbers.
334  * TODO This unnecessarily also handles nodes like calls, which are never equal.
335  *
336  * @param irn  a node representing an expression
337  * @return     the value of the expression
338  */
339 static ir_node *remember(ir_node *irn)
340 {
341         int       arity   = get_irn_arity(irn);
342         int       i;
343         int       changed = 0;
344         ir_node **in      = XMALLOCN(ir_node *, arity);
345         ir_node  *value;
346
347         for (i = 0; i < arity; ++i) {
348                 ir_node *pred       = get_irn_n(irn, i);
349                 /* value and leader at the same time */
350                 ir_node *pred_value = identify(pred);
351
352                 /* phi will be translated anyway, so kill the predecessor values
353                    this also prevents circular dependencies */
354                 if (is_Phi(pred)) {
355                         /* every phi represents its own value */
356                         in[i] = pred;
357                         continue;
358                 }
359
360                 /* predecessor is not its value representation/the leader */
361                 if (pred != pred_value)
362                         changed = 1;
363                 in[i] = pred_value;
364         }
365
366         if (changed && !is_memop(irn) && get_irn_mode(irn) != mode_X) {
367                 /* create representative for */
368                 ir_node *nn = new_ir_node(
369                         get_irn_dbg_info(irn),
370                         get_irn_irg(irn),
371                         get_nodes_block(irn),
372                         get_irn_op(irn),
373                         get_irn_mode(irn),
374                         get_irn_arity(irn),
375                         in);
376                 copy_node_attr(environ->graph, irn, nn);
377
378 #if OPTIMIZE_NODES
379                 /* TODO optimize_node() uses the valuetable and thus the custom
380                    identify_cmp() and will fail trying. */
381                 environ->graph->value_table = environ->value_table;
382                 nn = optimize_node(nn);
383                 environ->graph->value_table = environ->gvnpre_values;
384 #endif
385
386                 /* now the value can be determined because the
387                    predecessors are the leaders */
388                 value = identify_remember(nn);
389         } else {
390                 value = identify_remember(irn);
391         }
392         free(in);
393
394         DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
395         ir_nodehashmap_insert(&value_map, irn, value);
396
397         return value;
398 }
399
400 /**
401  * When the value map has been built we may lookup expressions
402  * and remember them if new.
403  */
404 static ir_node *identify_or_remember(ir_node *irn)
405 {
406         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
407         if (value)
408                 return value;
409         else
410                 return remember(irn);
411 }
412
413 /* --------------------------------------------------------
414  * Block info
415  * --------------------------------------------------------
416  */
417
418 /**
419  * Allocate block info for block block.
420  *
421  * @param block   the block
422  * @param env     the environment
423  */
424 static void alloc_block_info(ir_node *block, pre_env *env)
425 {
426         block_info *info = OALLOC(&env->obst, block_info);
427
428         set_irn_link(block, info);
429         info->exp_gen    = ir_valueset_new(16);
430         info->avail_out  = ir_valueset_new(16);
431         info->antic_in   = ir_valueset_new(16);
432         info->antic_done = ir_valueset_new(16);
433         info->trans = XMALLOC(ir_nodehashmap_t);
434         ir_nodehashmap_init(info->trans);
435
436         info->new_set = NULL;
437         info->avail   = NULL;
438         info->block   = block;
439         info->found   = 1;
440
441         info->next = env->list;
442         env->list  = info;
443 }
444
445 static void free_block_info(block_info *block_info)
446 {
447         ir_valueset_del(block_info->exp_gen);
448         ir_valueset_del(block_info->avail_out);
449         ir_valueset_del(block_info->antic_in);
450         if (block_info->trans) {
451                 ir_nodehashmap_destroy(block_info->trans);
452                 free(block_info->trans);
453         }
454         if (block_info->new_set)
455                 ir_valueset_del(block_info->new_set);
456 }
457
458 /**
459  * Bottom up walker that ensures that every block gets a block info.
460  *
461  * @param irn   the node
462  * @param ctx   the environment
463  */
464 static void block_info_walker(ir_node *irn, void *ctx)
465 {
466         if (is_Block(irn)) {
467                 pre_env *env = (pre_env*)ctx;
468                 alloc_block_info(irn, env);
469         }
470 }
471
472 /**
473  * Returns the block info of a block.
474  */
475 static block_info *get_block_info(ir_node *block)
476 {
477         return (block_info*)get_irn_link(block);
478 }
479
480 /* --------------------------------------------------------
481  * Infinite loop analysis
482  * --------------------------------------------------------
483  */
484
485 /**
486  * Walker to set block marks and loop links to 0.
487  */
488 static void clear_block_mark_loop_link(ir_node *block, void *env)
489 {
490         (void) env;
491
492         if (is_Block(block)) {
493                 set_Block_mark(block, 0);
494                 set_loop_link(get_irn_loop(block), NULL);
495         }
496 }
497
498 /**
499  * Returns non-zero if block is part of real loop loop.
500  */
501
502 static unsigned in_loop(ir_node *block, ir_loop *loop)
503 {
504         ir_loop *l     = get_irn_loop(block);
505         ir_loop *outer = get_irg_loop(environ->graph);
506
507         while (l != loop) {
508                 /* loop tree root is not a loop */
509                 if (l == NULL || l == outer)
510                         return 0;
511                 l = get_loop_outer_loop(l);
512         }
513         return 1;
514 }
515
516 /**
517  * Returns the outermost real loop of loop.
518  */
519 static ir_loop *get_loop_outermost(ir_loop *loop)
520 {
521         ir_loop *outer = get_irg_loop(environ->graph);
522         ir_loop *l     = loop;
523         ir_loop *last  = NULL;
524
525         while(l != outer) {
526                 last = l;
527                 l = get_loop_outer_loop(l);
528         }
529         return last;
530 }
531
532 /**
533  * Topologic bottom-up walker sets links of infinite loops to non-zero.
534  * Block marks are used to flag blocks reachable (from end) on the one hand,
535  * on the other hand they are set if the block is not part of an infinite loop.
536  */
537 static void infinite_loop_walker(ir_node *block, void *env)
538 {
539         int arity;
540         int i;
541         (void) env;
542
543         if (! is_Block(block))
544                 return;
545
546         /* start block has no predecessors */
547         if (block == environ->start_block)
548                 return;
549
550         arity = get_irn_arity(block);
551
552         /* block not part of a real loop: no infinite loop */
553         if (get_irn_loop(block) == get_irg_loop(environ->graph))
554                 set_Block_mark(block, 1);
555
556         if (get_Block_mark(block)) {
557                 /* reachable block: mark all cf predecessors */
558                 for (i = 0; i < arity; ++i) {
559                         ir_node *pred = get_Block_cfgpred_block(block, i);
560                         if (is_Bad(pred))
561                                 continue;
562                         set_Block_mark(pred, 1);
563                 }
564         } else {
565                 /* We are in a real loop and see an unreachable block. */
566                 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
567
568                 /* flag loop as infinite */
569                 set_loop_link(outermost_loop, outermost_loop);
570                 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
571
572                 /* The cf predecessors are unreachable, but can never be part of
573                    an infinite loop, because we just reached them. So we set the
574                    blockmark to prevent triggering the infinite loop detection. */
575
576                 /* passing information to the cf predecessors */
577                 for (i = 0; i < arity; ++i) {
578                         ir_node *pred = get_Block_cfgpred_block(block, i);
579
580                         if (is_Bad(pred))
581                                 continue;
582
583                         /* If our cf predecessor is in the same endless loop,
584                            it is also unreachable. */
585                         if (in_loop(pred, outermost_loop)) {
586                                 set_Block_mark(pred, 0);
587                         } else {
588                                 /* When we leave the unreachable loop, we artificially
589                                    declare the cf predecessor reachable. */
590                                 set_Block_mark(pred, 1);
591                         }
592                 }
593         }
594 }
595
596 /**
597  * Sets loop links of outermost infinite loops to non-zero.
598  */
599 static void analyse_loops(ir_graph *irg)
600 {
601         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
602
603         /* reset block mark and loop links */
604         irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
605
606         /* mark end block reachable */
607         set_Block_mark(get_irg_end_block(irg), 1);
608         irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
609
610         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
611 }
612
613 #if IGNORE_INF_LOOPS || NO_INF_LOOPS
614 /**
615  * Returns non-zero if block is part of an infinite loop.
616  */
617 static unsigned is_in_infinite_loop(ir_node *block)
618 {
619         ir_loop *loop;
620
621         assert(is_Block(block));
622         loop = get_irn_loop(block);
623         assert(loop);
624
625         loop = get_loop_outermost(loop);
626         if (loop)
627                 return (get_loop_link(loop) != NULL);
628         else
629                 return 0;
630 }
631 #endif
632
633 /* --------------------------------------------------------
634  * GVN-PRE Exp_gen
635  * --------------------------------------------------------
636  */
637
638 /**
639  * Returns non-zero if a node is movable and a possible candidate for PRE.
640  */
641 static unsigned is_nice_value(ir_node *n)
642 {
643         ir_mode *mode = get_irn_mode(n);
644
645         if (is_Phi(n))
646                 return 1;
647
648 #if LOADS || DIVMODS
649         if (is_Proj(n) && mode != mode_X && mode != mode_T)
650                 return 1;
651 #else
652         if (is_Proj(n))
653                 return 0;
654 #endif
655
656 #if LOADS
657         if (is_Load(n))
658                 return get_Load_volatility(n) == volatility_non_volatile;
659         if (is_Store(n))
660                 return get_Store_volatility(n) == volatility_non_volatile;
661 #endif
662
663         if (get_irn_pinned(n) == op_pin_state_pinned)
664                 return 0;
665
666         if (! mode_is_data(mode)) {
667                 if (! is_Div(n) && ! is_Mod(n))
668                         return 0;
669         }
670         return 1;
671 }
672
673 /**
674  * Checks if a node n is clean in block block for exp_gen.
675  *
676  * @param n      the node
677  * @param block  the block
678  * @return non-zero value for clean node
679  */
680 static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
681 {
682         int         i, arity;
683
684         if (is_Phi(n))
685                 return 1;
686
687         if (! is_nice_value(n))
688                 return 0;
689
690 #if LOADS
691         /* filter loads with no phi predecessor from antic_in */
692         if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
693                 return 0;
694         if (is_Store(n) && ! is_Phi(get_Store_mem(n)))
695                 return 0;
696 #endif
697
698 #if DIVMODS
699         if (is_Div(n)) {
700                 ir_node *mem = get_Div_mem(n);
701
702                 mem = skip_Pin(mem);
703
704                 if (! is_Phi(mem) && ! is_NoMem(mem))
705                         return 0;
706         }
707
708         if (is_Mod(n) && ! is_Phi(get_Mod_mem(n)))
709                 return 0;
710 #endif
711
712         arity = get_irn_arity(n);
713         for (i = 0; i < arity; ++i) {
714                 ir_node *pred   = get_irn_n(n, i);
715                 ir_node *value;
716
717                 if (is_Phi(pred))
718                         continue;
719
720                 /* we only handle current block */
721                 if (get_nodes_block(pred) != block)
722                         continue;
723
724                 if (! is_nice_value(pred))
725                         return 0;
726
727                 value = identify(pred);
728                 if (! ir_valueset_lookup(valueset, value))
729                         return 0;
730
731         }
732         return 1;
733 }
734
735 /**
736  * Topological walker puts nodes in top-down topological order into exp_gen set.
737  * Assumed to walk blockwise and nodewise topologically top-down.
738  *
739  * @param irn    the node
740  * @param ctx    the environment
741  */
742 static void topo_walker(ir_node *irn, void *ctx)
743 {
744         ir_node    *block;
745         block_info *info;
746         ir_node    *value;
747         (void) ctx;
748
749         if (is_Block(irn))
750                 return;
751
752 #if OPTIMIZE_NODES
753         environ->graph->value_table = environ->value_table;
754         identify_remember(irn);
755         environ->graph->value_table = environ->gvnpre_values;
756 #endif
757
758         /* GVN step: remember the value. */
759         value = remember(irn);
760
761         if (is_irn_constlike(irn))
762                 return;
763
764         block = get_nodes_block(irn);
765         info  = get_block_info(block);
766
767         if (get_irn_mode(irn) != mode_X)
768                 ir_valueset_insert(info->avail_out, value, irn);
769
770         /* values that are not in antic_in also dont't need to be in any other set */
771
772         if (! is_nice_value(irn))
773                 return;
774
775         if (is_clean_in_block(irn, block, info->exp_gen)) {
776                 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
777
778                 ir_valueset_insert(info->exp_gen, value, irn);
779         }
780 }
781
782 /* --------------------------------------------------------
783  * GVN-PRE Antic_in
784  * --------------------------------------------------------
785  */
786
787 /**
788  * Gets result of nodes phi translation into block.
789  *
790  * @param node   the node
791  * @param block  the target block
792  *
793  * @return a phi translation of node node into block block or NULL
794  */
795 static ir_node *get_translated(ir_node *block, ir_node *node)
796 {
797         if (is_irn_constlike(node))
798                 return node;
799
800         return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
801 }
802
803 /**
804  * Saves result of phi translation of node into predecessor
805  * at pos of block succ.
806  *
807  * @param node   the node
808  * @param succ   the successor of the translation target block
809  * @param pos    the position of the predecessor block
810  * @param trans  the translation result
811  *
812  */
813 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
814 {
815         if (is_irn_constlike(node))
816                 return;
817         /* insert or replace */
818         ir_nodehashmap_insert(map, node, trans);
819 }
820
821 /**
822  * Translates an expression above a Phi.
823  *
824  * @param node        the node
825  * @param block       the block the node is translated into
826  * @param pos         the input number of the destination block
827  *
828  * @return a node representing the translated value
829  */
830 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *leaderset)
831 {
832         int       i;
833         int       arity;
834         ir_node **in;
835         ir_node  *pred_block = get_Block_cfgpred_block(block, pos);
836         ir_node  *nn;
837         unsigned  needed;
838
839         if (is_Phi(node)) {
840                 if (get_nodes_block(node) == block)
841                         return get_Phi_pred(node, pos);
842                 /* this phi does not need translation */
843                 return node;
844         }
845         arity = get_irn_arity(node);
846
847         needed = 0;
848         in = ALLOCANZ(ir_node *, arity);
849
850         /* A value has several representatives. The anti leader is chosen to be
851            the main representative. If we access a node as representative of a
852            value we always use the anti leader. The anti leader can be found by
853            antic_in(identify(node)). */
854         for (i = 0; i < arity; ++i) {
855                 ir_node *pred   = get_irn_n(node, i);
856                 ir_node *value  = identify(pred);
857                 /* get leader for pred to lookup its translated value */
858                 ir_node *leader = ir_valueset_lookup(leaderset, value);
859                 ir_node *pred_trans;
860                 ir_node *new_pred;
861
862                 if (! leader)
863                         leader = pred;
864
865                 /* we cannot find this value in antic_in, because the value
866                    has (possibly) changed! */
867                 pred_trans  = get_translated(pred_block, leader);
868
869
870 #if DIVMODS
871                 if (is_Div(node)) {
872                         ir_node *mem = get_Div_mem(node);
873
874                         mem = skip_Pin(mem);
875
876                         if (! is_Phi(mem))
877                                 pred_trans = get_Div_mem(node);
878                 }
879 #endif
880
881                 DB((dbg, LEVEL_3, "trans %+F of %+F is  %+F\n", leader, pred_block, pred_trans));
882                 if (pred_trans == NULL) {
883                         new_pred = pred;
884                 } else {
885                         new_pred = pred_trans;
886
887                         /* loads: Predecessor is a memory phi, which translated yields a proj or
888                            another phi. In case of projection and a load predecessor,
889                            skip them and use the loads memory. */
890                         if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
891 #if LOADS || DIVMODS
892                                 ir_node *loadstore = get_Proj_pred(pred_trans);
893                                 /* If we do not translate this node, we will get its value wrong. */
894                                 needed |= 1;
895
896                                 if (is_Load(loadstore)) {
897                                         /* Put new load under the adjacent loads memory edge
898                                            such that GVN may compare them. */
899                                         new_pred = get_Load_mem(loadstore);
900                                 } else if (is_Store(loadstore)) {
901                                         new_pred = get_Store_mem(loadstore);
902                                 }
903 #endif
904                         } else {
905                                 /* predecessor value changed, so translation is needed */
906                                 if (identify(new_pred) != identify(pred))
907                                         needed |= 1;
908                         }
909                 }
910
911                 DB((dbg, LEVEL_4, "in %+F\n", new_pred));
912                 in[i] = new_pred;
913         }
914
915         if (! needed)
916                 return node;
917
918         DB((dbg, LEVEL_3, "Translate\n"));
919
920         if (is_Proj(node))
921                 pred_block = get_nodes_block(in[0]);
922
923         /* copy node to represent the new value.
924            We do not translate nodes that do not need translation,
925            so we use the newly created nodes as value representatives only.
926            Their block is not important, because we create new ones during
927            the insert node phase. */
928         nn = new_ir_node(
929                 get_irn_dbg_info(node),
930                 environ->graph,
931                 pred_block,
932                 get_irn_op(node),
933                 get_irn_mode(node),
934                 arity,
935                 in);
936         /* We need the attribute copy here, because the Hash value of a
937            node might depend on it. */
938         copy_node_attr(environ->graph, node, nn);
939         /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
940            because it already uses availability. */
941
942         DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
943         return nn;
944 }
945
946 /**
947  * Block-walker, computes Antic_in(block).
948  * Builds a value tree out of the graph by translating values
949  * over phi nodes.
950  *
951  * @param block  the block
952  * @param ctx    the walker environment
953  */
954 static void compute_antic(ir_node *block, void *ctx)
955 {
956         pre_env                *env       = (pre_env*)ctx;
957         block_info             *succ_info;
958         block_info             *info;
959         ir_node                *succ;
960         ir_node                *value;
961         ir_node                *expr;
962         size_t                  size;
963         ir_valueset_iterator_t  iter;
964         int                     n_succ;
965
966         /* filter blocks from topological walker */
967         if (! is_Block(block))
968                 return;
969
970         /* the end block has no successor */
971         if (block == env->end_block)
972                 return;
973
974         info = get_block_info(block);
975         /* track changes */
976         size = ir_valueset_size(info->antic_in);
977         n_succ = get_Block_n_cfg_outs(block);
978
979         /* add exp_gen */
980         if (env->first_iter) {
981 #if IGNORE_INF_LOOPS
982                 /* keep antic_in of infinite loops empty */
983                 if (! is_in_infinite_loop(block)) {
984                         foreach_valueset(info->exp_gen, value, expr, iter) {
985                                 ir_valueset_insert(info->antic_in, value, expr);
986                         }
987                 }
988 #else
989                 foreach_valueset(info->exp_gen, value, expr, iter) {
990                         ir_valueset_insert(info->antic_in, value, expr);
991                 }
992 #endif
993         }
994
995         /* successor might have phi nodes */
996         if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
997                 succ      = get_Block_cfg_out(block, 0);
998                 int pos   = get_Block_cfgpred_pos(succ, block);
999                 succ_info = get_block_info(succ);
1000
1001                 /* initialize translated set */
1002                 if (env->first_iter) {
1003                         info->trans = XMALLOC(ir_nodehashmap_t);
1004                         ir_nodehashmap_init(info->trans);
1005                 }
1006
1007                 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1008                         ir_node *trans = get_translated(block, expr);
1009                         ir_node *trans_value;
1010                         ir_node *represent;
1011
1012                         if (trans == NULL)
1013                                 trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
1014
1015                         /* create new value if necessary */
1016                         trans_value = identify_or_remember(trans);
1017
1018                         DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1019
1020                         /* On value change (phi present) we need the translated node
1021                            to represent the new value for possible further translation. */
1022                         if (value != trans_value)
1023                                 represent = trans;
1024                         else
1025                                 represent = expr;
1026
1027                         if (is_clean_in_block(expr, block, info->antic_in)) {
1028 #if NO_INF_LOOPS
1029                                 /* Prevent information flow over the backedge of endless loops. */
1030                                 if (env->iteration <= 2 || (is_backedge(succ, pos) && !is_in_infinite_loop(succ))) {
1031                                         ir_valueset_replace(info->antic_in, trans_value, represent);
1032                                 }
1033 #else
1034                                 ir_valueset_replace(info->antic_in, trans_value, represent);
1035 #endif
1036                         }
1037                         set_translated(info->trans, expr, represent);
1038                 }
1039
1040         } else if (n_succ > 1) {
1041                 int         i;
1042                 ir_node    *common     = NULL;
1043                 ir_node    *succ0      = get_Block_cfg_out(block, 0);
1044                 block_info *succ0_info = get_block_info(succ0);
1045
1046                 /* disjoint of antic_ins */
1047                 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1048                         /* iterate over remaining successors */
1049                         for (i = 1; i < n_succ; ++i) {
1050                                 ir_node    *succ      = get_Block_cfg_out(block, i);
1051                                 block_info *succ_info = get_block_info(succ);
1052
1053                                 /* value in antic_in? */
1054                                 common = ir_valueset_lookup(succ_info->antic_in, value);
1055                                 if (common == NULL)
1056                                         break;
1057                         }
1058
1059                         if (common && is_clean_in_block(expr, block, info->antic_in))
1060                                 ir_valueset_replace(info->antic_in, value, expr);
1061                 }
1062         }
1063
1064
1065         DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1066
1067         if (size != ir_valueset_size(info->antic_in))
1068                 env->changes |= 1;
1069 }
1070
1071 /* --------------------------------------------------------
1072  * Main algorithm Avail_out
1073  * --------------------------------------------------------
1074  */
1075
1076 /**
1077  * Computes Avail_out(block):
1078  *
1079  * Avail_in(block)  = Avail_out(dom(block))
1080  * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1081  *
1082  * Precondition:
1083  *  This function must be called in the top-down topological order:
1084  *  Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1085  *
1086  * @param block   the block
1087  * @param ctx     walker context
1088  */
1089 static void compute_avail_top_down(ir_node *block, void *ctx)
1090 {
1091         pre_env    *env   = (pre_env*)ctx;
1092         block_info *info;
1093
1094         if (block == env->end_block)
1095                 return;
1096
1097         info  = get_block_info(block);
1098
1099         /* Add all nodes from the immediate dominator.
1100            This ensures that avail_out contains the leader. */
1101         if (block != env->start_block) {
1102                 ir_node                *dom_block = get_Block_idom(block);
1103                 block_info             *dom_info  = get_block_info(dom_block);
1104                 ir_node                *value;
1105                 ir_node                *expr;
1106                 ir_valueset_iterator_t  iter;
1107
1108                 foreach_valueset(dom_info->avail_out, value, expr, iter)
1109                         /* replace: use the leader from dominator, not local exp_gen */
1110                         ir_valueset_replace(info->avail_out, value, expr);
1111         }
1112
1113         DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1114 }
1115
1116 /* --------------------------------------------------------
1117  * Main algorithm redundancy detection
1118  * --------------------------------------------------------
1119  */
1120
1121 /**
1122  * Returns a valid mode if the value of expr is a partially redundant value.
1123  *
1124  * @param block   the block
1125  * @param expr    the expression
1126  *
1127  * @return mode of the expression if it is partially redundant else NULL
1128  */
1129 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1130 {
1131         ir_node *first_avail         = NULL;
1132         int      pos;
1133         int      arity               = get_irn_arity(block);
1134         int      fully_redundant     = 1;
1135         int      partially_redundant = 0;
1136         ir_mode *mode                = NULL;
1137
1138         DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1139
1140         /* for each predecessor blocks */
1141         for (pos = 0; pos < arity; ++pos) {
1142                 ir_node    *pred_block  = get_Block_cfgpred_block(block, pos);
1143                 block_info *pred_info;
1144                 ir_node    *trans_expr;
1145                 ir_node    *trans_value;
1146                 ir_node    *avail_expr;
1147
1148                 pred_info  = get_block_info(pred_block);
1149                 trans_expr = get_translated(pred_block, expr);
1150                 trans_value = identify(trans_expr);
1151
1152                 if (is_Const(trans_expr))
1153                         avail_expr = trans_expr;
1154                 else
1155                         avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1156
1157                 /* value might be available through a not yet existing constant */
1158                 if (avail_expr == NULL && is_Const(trans_expr)) {
1159                         /* limit range of new constants */
1160                         ir_mode   *cmode = get_irn_mode(trans_expr);
1161                         ir_tarval *upper = new_tarval_from_long(127, cmode);
1162                         ir_tarval *lower = new_tarval_from_long(-127, cmode);
1163                         ir_tarval *c     = get_Const_tarval(trans_expr);
1164
1165                         /* tarval within range? */
1166                         if (tarval_cmp(lower, c) == ir_relation_less_equal &&
1167                                 tarval_cmp(c, upper) == ir_relation_less_equal) {
1168                                 avail_expr = trans_expr;
1169                         } else {
1170                                 avail_expr = NULL;
1171                         }
1172             }
1173
1174                 DB((dbg, LEVEL_3, "avail_expr %+F  trans_expr %+F\n", avail_expr, trans_expr));
1175
1176                 if (avail_expr == NULL) {
1177                         pred_info->avail = trans_expr;
1178                         pred_info->found = 0;
1179                         fully_redundant  = 0;
1180                 } else {
1181                         /* expr is available, use the leader */
1182                         pred_info->avail    = avail_expr;
1183                         pred_info->found    = 1;
1184                         mode                = get_irn_mode(avail_expr);
1185                         partially_redundant = 1;
1186
1187                         if (first_avail == NULL)
1188                                 first_avail = avail_expr;
1189                         else if (first_avail != avail_expr)
1190                                 /* Multiple different expressions are available,
1191                                    This is why we need no cut over avail_out sets. */
1192                                 fully_redundant = 0;
1193
1194                         DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1195                 }
1196         }
1197
1198         /* If it is not the same value already existing along every predecessor
1199        and it is defined by some predecessor then it is partially redundant. */
1200         if (! partially_redundant || fully_redundant)
1201                 return NULL;
1202         return mode;
1203 }
1204
1205 /**
1206  * Updates the new_set of a block by adding the new_set of
1207  * the immediate dominating block.
1208  *
1209  * @param  the block
1210  */
1211 static void update_new_set(ir_node *block, ir_node *idom)
1212 {
1213         ir_node                *value;
1214         ir_node                *expr;
1215         ir_valueset_iterator_t  iter;
1216         block_info             *curr_info = get_block_info(block);
1217         block_info             *idom_info = get_block_info(idom);
1218         int                     updated   = 0;
1219
1220         DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1221         foreach_valueset(idom_info->new_set, value, expr, iter) {
1222                 /* inherit new_set from immediate dominator */
1223                 ir_valueset_insert(curr_info->new_set, value, expr);
1224                 /* replace in avail_out */
1225                 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1226         }
1227 #ifdef DEBUG_libfirm
1228         if (updated)
1229                 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1230 #endif
1231 }
1232
1233 /**
1234  * Checks if hoisting irn is greedy.
1235  * Greedy hoisting means that there are non partially redundant nodes
1236  * hoisted. This happens if a partially redundant node has
1237  * non redundant predecessors.
1238  */
1239 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1240 {
1241         int block_arity = get_irn_arity(block);
1242         int arity = get_irn_arity(irn);
1243         int pos, i;
1244         block_info *info = get_block_info(block);
1245
1246         /* As long as the predecessor values are available in all predecessor blocks,
1247            we can hoist this value. */
1248         for (pos = 0; pos < block_arity; ++pos) {
1249                 ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1250                 block_info *pred_info  = get_block_info(pred_block);
1251
1252                 for (i = 0; i < arity; ++i) {
1253                         ir_node *pred     = get_irn_n(irn, i);
1254                         ir_node *value;
1255                         ir_node *leader;
1256                         ir_node *trans;
1257                         ir_node *trans_val;
1258                         ir_node *avail;
1259
1260 #if MIN_CUT
1261                         /* Very conservative min cut. Phi might only have 1 user. */
1262                         if (is_Phi(pred) && get_irn_n_edges(pred) != 1)
1263                                 return 1;
1264 #endif
1265
1266                         if (is_Phi(pred) && get_nodes_block(pred) == block)
1267                                 continue;
1268
1269                         DB((dbg, LEVEL_3, "pred %+F\n", pred));
1270                         value = identify(pred);
1271                         leader = ir_valueset_lookup(info->antic_in, value);
1272                         if (! leader)
1273                                 leader = pred;
1274                         DB((dbg, LEVEL_3, "lead %+F\n", leader));
1275                         trans   = get_translated(pred_block, leader);
1276                         if (! trans)
1277                                 trans = pred;
1278                         DB((dbg, LEVEL_3, "trans %+F\n", trans));
1279
1280                         trans_val = identify(trans);
1281                         DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1282
1283                         if (is_Const(trans_val) || is_SymConst(trans_val)) {
1284                                 /* existing constant */
1285                                 if (get_irn_idx(trans_val) < environ->last_idx) {
1286                                         continue;
1287                                 } else {
1288                                         /* limit range of new constants */
1289                                         ir_mode   *cmode = get_irn_mode(trans);
1290                                         ir_tarval *upper = new_tarval_from_long(128, cmode);
1291                                         ir_tarval *lower = new_tarval_from_long(-128, cmode);
1292                                         ir_tarval *c     = get_Const_tarval(trans);
1293
1294                                         /* tarval within range? */
1295                                         if (tarval_cmp(lower, c) == ir_relation_less &&
1296                                                 tarval_cmp(c, upper) == ir_relation_less) {
1297                                                 continue;
1298                                         } else {
1299                                                 return 1;
1300                                         }
1301                                 }
1302                         }
1303
1304                         /* */
1305                         if (is_irn_constlike(trans_val))
1306                                 continue;
1307
1308                         avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1309
1310                         DB((dbg, LEVEL_3, "avail %+F\n", avail));
1311                         if (! avail)
1312                                 return 1;
1313 #if MIN_CUT
1314                         /* only optimize if predecessors have been optimized */
1315                         if (ir_valueset_lookup(info->antic_done, value) == NULL)
1316                                 return 1;
1317 #endif
1318                 }
1319         }
1320         return 0;
1321 }
1322
1323 /**
1324  * Perform insertion of partially redundant values.
1325  * For every block node, do the following:
1326  * 1.  Propagate the NEW_SETS of the dominator into the current block.
1327  * If the block has multiple predecessors,
1328  *     2a. Iterate over the ANTIC expressions for the block to see if
1329  *         any of them are partially redundant.
1330  *     2b. If so, insert them into the necessary predecessors to make
1331  *         the expression fully redundant.
1332  *     2c. Insert a new Phi merging the values of the predecessors.
1333  *     2d. Insert the new Phi, and the new expressions, into the
1334  *         NEW_SETS set.
1335  *
1336  * @param block  the block
1337  * @param ctx    the walker environment
1338  */
1339 static void insert_nodes_walker(ir_node *block, void *ctx)
1340 {
1341         pre_env                *env    = (pre_env*)ctx;
1342         int                     arity  = get_irn_arity(block);
1343         ir_node                *value;
1344         ir_node                *expr;
1345         block_info             *info;
1346         ir_node                *idom;
1347         int                     pos;
1348         ir_valueset_iterator_t  iter;
1349
1350         /* only blocks */
1351         if (! is_Block(block))
1352                 return;
1353
1354         /* ensure that even the start block has a new_set */
1355         info = get_block_info(block);
1356         if (info->new_set)
1357                 ir_valueset_del(info->new_set);
1358         info->new_set = ir_valueset_new(16);
1359
1360         if (block == env->start_block)
1361                 return;
1362
1363         DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1364
1365         idom = get_Block_idom(block);
1366         update_new_set(block, idom);
1367
1368         /* process only path joining blocks */
1369         if (arity < 2) {
1370                 return;
1371         }
1372
1373         /* This is the main reason antic_in is preverred over antic_out;
1374            we may iterate over every anticipated value first and not
1375            over the predecessor blocks. */
1376         foreach_valueset(info->antic_in, value, expr, iter) {
1377                 ir_mode  *mode;
1378                 ir_node  *phi;
1379                 ir_node **phi_in;
1380
1381                 /* already done? */
1382                 if (ir_valueset_lookup(info->antic_done, value))
1383                         continue;
1384
1385                 /* filter phi nodes from antic_in */
1386                 if (is_Phi(expr))
1387                         continue;
1388
1389                 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1390
1391                 /* A value computed in the dominator is totally redundant.
1392                    Hence we have nothing to insert. */
1393                 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1394                         DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1395                         DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1396
1397                         ir_valueset_insert(info->antic_done, value, expr);
1398                         continue;
1399                 }
1400
1401                 if (is_hoisting_greedy(expr, block)) {
1402                         DB((dbg, LEVEL_2, "greedy\n"));
1403                         continue;
1404                 }
1405
1406                 mode = is_partially_redundant(block, expr, value);
1407                 if (mode == NULL)
1408                         continue;
1409
1410 #if LOADS || DIVMODS
1411                 /* save old mode_M phis to remove keepalive edges later */
1412                 if (is_memop(expr)) {
1413                         ir_node *mem = get_memop_mem(expr);
1414                         if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1415                                 ir_nodeset_insert(env->keeps, mem);
1416                         }
1417                 }
1418 #endif
1419
1420 #ifdef DEBUG_libfirm
1421                 if (! is_Proj(expr)) {
1422                         if (env->first_iter)
1423                                 inc_stats(gvnpre_stats->first_iter_found);
1424                         inc_stats(gvnpre_stats->partially);
1425                 }
1426                 if (is_Load(expr) || is_Store(expr))
1427                         inc_stats(gvnpre_stats->loads);
1428                 else if (is_Div(expr) || is_Mod(expr))
1429                         inc_stats(gvnpre_stats->divmods);
1430 #endif
1431
1432                 phi_in = XMALLOCN(ir_node *, arity);
1433
1434                 /* for each predecessor block */
1435                 for (pos = 0; pos < arity; ++pos) {
1436                         ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1437                         block_info *pred_info;
1438
1439                         pred_info = get_block_info(pred_block);
1440
1441                         if (! pred_info->found) {
1442                                 int i;
1443                                 int node_arity = get_irn_arity(expr);
1444                                 ir_node **in = XMALLOCNZ(ir_node *, node_arity);
1445                                 ir_node *trans;
1446                                 ir_node *new_value, *new_value2;
1447                                 ir_node *target_block = pred_block;
1448
1449                                 for (i = 0; i < node_arity; ++i) {
1450                                         ir_node *pred     = get_irn_n(expr, i);
1451                                         ir_node *value    = identify(pred);
1452                                         ir_node *leader;
1453                                         ir_node *trans;
1454                                         ir_node *trans_val;
1455                                         ir_node *avail;
1456
1457                                         /* transform knowledge over the predecessor from
1458                                            anti-leader world into leader world. */
1459
1460                                         DB((dbg, LEVEL_3, "pred %+F\n", pred));
1461                                         value = identify(pred);
1462
1463                                         /* get leader for pred to lookup its translated value */
1464                                         leader = ir_valueset_lookup(info->antic_in, value);
1465                                         if (! leader)
1466                                                 leader = pred;
1467                                         DB((dbg, LEVEL_3, "lead %+F\n", leader));
1468
1469                                         trans   = get_translated(pred_block, leader);
1470                                         if (!trans)
1471                                                 trans = pred;
1472                                         DB((dbg, LEVEL_3, "trans %+F\n", trans));
1473
1474                                         /* in case of phi, we are done */
1475                                         if (is_Phi(pred) && get_nodes_block(pred) == block) {
1476                                                 in[i] = trans;
1477                                                 continue;
1478                                         }
1479
1480                                         trans_val = identify(trans);
1481                                         DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1482
1483                                         /* constants are always available but not in avail set */
1484                                         if (is_irn_constlike(trans_val)) {
1485                                                 in[i] = trans;
1486                                                 continue;
1487                                         }
1488
1489                                         /* use the leader
1490                                            In case of loads we need to make sure the hoisted
1491                                            loads are found despite their unique value. */
1492                                         avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1493                                         DB((dbg, LEVEL_3, "avail %+F\n", avail));
1494
1495                                         assert(avail && "predecessor has to be available");
1496                                         in[i] = avail;
1497                                 }
1498
1499                                 if (is_Proj(expr))
1500                                         target_block = get_nodes_block(in[0]);
1501
1502                                 /* Copy node to represent the new value.
1503                                    We use translated nodes as value representatives only.
1504                                    They have anti leaders as predecessors, not leaders!
1505                                    So we have to create a new node using leaders. */
1506                                 trans = new_ir_node(
1507                                         get_irn_dbg_info(expr),
1508                                         environ->graph,
1509                                         target_block,
1510                                         get_irn_op(expr),
1511                                         get_irn_mode(expr),
1512                                         get_irn_arity(expr),
1513                                         in);
1514                                 free(in);
1515                                 /* We need the attribute copy here, because the Hash value of a
1516                                    node might depend on it. */
1517                                 copy_node_attr(environ->graph, expr, trans);
1518
1519                                 /* value is now available in target block through trans
1520                                    insert (not replace) because it has not been available */
1521                                 new_value = identify_or_remember(trans);
1522                                 ir_valueset_insert(pred_info->avail_out, new_value, trans);
1523                                 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value));
1524
1525                                 new_value2 = identify(get_translated(pred_block, expr));
1526                                 ir_valueset_insert(pred_info->avail_out, new_value2, trans);
1527                                 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value2));
1528
1529                                 DB((dbg, LEVEL_3, "Use new %+F in %+F because %+F(%+F) not available\n", trans, pred_block, expr, value));
1530
1531                                 phi_in[pos] = trans;
1532                         } else {
1533                                 /* value available */
1534                                 phi_in[pos] = pred_info->avail;
1535                         }
1536                         DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1537                 }
1538
1539                 /* We do not connect tuples as they will be connected automatically
1540                    by the corresponding projections. */
1541                 if (get_irn_mode(expr) != mode_T) {
1542
1543                         phi = new_r_Phi(block, arity, phi_in, mode);
1544                         DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1545
1546                         /* This value is now available through the new phi.
1547                            insert || replace in avail_out */
1548                         ir_valueset_replace(info->avail_out, value, phi);
1549                         ir_valueset_insert(info->new_set, value, phi);
1550                 }
1551                 free(phi_in);
1552
1553                 /* already optimized this value in this block */
1554                 ir_valueset_insert(info->antic_done, value, expr);
1555                 env->changes |= 1;
1556         }
1557 }
1558
1559 #if HOIST_HIGH
1560 static void update_new_set_walker(ir_node *block, void *ctx)
1561 {
1562         pre_env *env = (pre_env*)ctx;
1563
1564         if (! is_Block(block))
1565                 return;
1566         if (block == env->start_block)
1567                 return;
1568
1569         update_new_set(block, get_Block_idom(block));
1570 }
1571
1572 /**
1573  * Domtree block walker to insert nodes with dying operands
1574  * into the highest possible block whilst still being anticipated.
1575  */
1576 static void hoist_high(ir_node *block, void *ctx)
1577 {
1578         (void)ctx;
1579
1580         block_info             *curr_info;
1581         ir_valueset_iterator_t  iter;
1582         ir_node                *expr;
1583         ir_node                *value;
1584         int                     arity      = get_irn_arity(block);
1585
1586         if (! is_Block(block))
1587                 return;
1588
1589         curr_info = get_block_info(block);
1590
1591         if (curr_info->new_set)
1592                 ir_valueset_del(curr_info->new_set);
1593         curr_info->new_set = ir_valueset_new(16);
1594
1595         if (arity < 2)
1596                 return;
1597
1598         DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1599
1600         /* foreach entry optimized by insert node phase */
1601         foreach_valueset(curr_info->antic_done, value, expr, iter) {
1602                 int pos;
1603
1604                 /* TODO currently we cannot handle load and their projections */
1605                 if (is_memop(expr) || is_Proj(expr))
1606                         continue;
1607
1608                 DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
1609
1610                 /* visit hoisted expressions */
1611                 for (pos = 0; pos < arity; ++pos) {
1612                         /* standard target is predecessor block */
1613                         ir_node    *target     = get_Block_cfgpred_block(block, pos);
1614                         block_info *pred_info  = get_block_info(target);
1615                         ir_node    *avail;
1616                         ir_node    *new_target;
1617                         ir_node    *trans_expr;
1618                         ir_node    *trans_value;
1619                         ir_node    *dom;
1620                         int         avail_arity;
1621                         int         i;
1622                         unsigned    nest_depth;
1623                         block_info *dom_info;
1624
1625                         /* get phi translated value */
1626                         trans_expr  = get_translated(target, expr);
1627                         trans_value = identify(trans_expr);
1628                         avail       = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1629
1630                         /* get the used expr on this path */
1631
1632                         /* TODO when does this happen? */
1633                         if (avail == NULL)
1634                                 continue;
1635
1636                         avail_arity = get_irn_arity(avail);
1637                         value = identify(avail);
1638
1639                         /* anticipation border */
1640                         new_target  = NULL;
1641                         nest_depth  = get_loop_depth(get_irn_loop(target));
1642
1643                         /* Either push the hoisted nodes up their path,
1644                            or try to put them directly into their common dominator. */
1645 #if COMMON_DOM
1646                         /* By using block (instead of target) as initial block,
1647                            we only allow hoisting into a common block of
1648                            both predecessor blocks. */
1649                         dom         = block;
1650 #else
1651                         dom         = target;
1652 #endif
1653
1654                         while (dom && dom != get_Block_idom(block)) {
1655
1656                                 dom = get_Block_idom(dom);
1657                                 dom_info = get_block_info(dom);
1658                                 DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
1659
1660                                 /* TODO Being in antic_in means hoistable above block,
1661                                    but we need 'hoistable into block'.
1662                                    This could be achieved by a flag for each valueset pair,
1663                                    being set during antic computation. */
1664
1665                                 /* check if available node ist still anticipated and clean */
1666                                 if (! ir_valueset_lookup(dom_info->antic_in, value)) {
1667                                         DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
1668                                         break;
1669                                 }
1670
1671                                 nest_depth = get_loop_depth(get_irn_loop(dom));
1672
1673                                 /* do not hoist into loops */
1674                                 if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
1675                                         DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
1676                                         /* not a suitable location */
1677                                         continue;
1678                                 }
1679
1680                                 /* check if operands die */
1681
1682                                 /* check for uses on current path */
1683                                 for (i = 0; i < avail_arity; i++) {
1684                                         ir_node   *pred       = get_irn_n(avail, i);
1685                                         ir_node   *pred_value = identify(pred);
1686
1687                                         if (dom == NULL)
1688                                                 break;
1689
1690                                         DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
1691
1692                                         if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
1693                                                 DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
1694                                                 dom = NULL;
1695                                                 break;
1696                                         }
1697
1698                                         /* check every successor */
1699                                         foreach_out_edge(pred, edge) {
1700                                                 ir_node *succ = get_edge_src_irn(edge);
1701                                                 DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
1702
1703                                                 /* check only successors on current path to end */
1704                                                 if (block_dominates(dom, get_nodes_block(succ))) {
1705                                                         ir_node *succ_value = identify(succ);
1706
1707                                                         /* Do we have another user than avail?
1708                                                            Then predecessor is not dead after removal of avail. */
1709                                                         if (succ_value != value) {
1710                                                                 DB((dbg, LEVEL_4, "still used in %+F\n", succ));
1711                                                                 dom = NULL;
1712                                                                 break;
1713                                                         }
1714                                                 }
1715                                         }
1716                                 }
1717                                 if (dom)
1718                                         new_target = dom;
1719
1720 #if COMMON_DOM
1721                                 /* only try common dominator */
1722                                 break;
1723 #endif
1724                         }
1725
1726                         /* put node into new target block */
1727                         if (new_target) {
1728                                 block_info *target_info = get_block_info(new_target);
1729                                 int         nn_arity    = get_irn_arity(avail);
1730                                 ir_node   **in          = XMALLOCN(ir_node *, nn_arity);
1731                                 ir_node    *nn;
1732                                 int         i;
1733
1734                                 DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
1735                                 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1736
1737                                 for (i = 0; i < nn_arity; ++i) {
1738                                         ir_node *pred       = get_irn_n(avail, i);
1739                                         ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
1740                                         assert(avail_pred);
1741                                         in[i] = avail_pred;
1742                                 }
1743                                 nn = new_ir_node(
1744                                         get_irn_dbg_info(avail),
1745                                         environ->graph,
1746                                         new_target,
1747                                         get_irn_op(avail),
1748                                         get_irn_mode(avail),
1749                                         nn_arity,
1750                                         in);
1751                                 free(in);
1752
1753                                 identify_or_remember(nn);
1754                                 /* TODO Nodes are inserted into a dominating block and should
1755                                    be available from this point on. Currently we do not push
1756                                    the availability information through during the walk. */
1757                                 ir_valueset_insert(target_info->new_set, value, nn);
1758                                 ir_valueset_insert(target_info->avail_out, value, nn);
1759                         }
1760                 }
1761         }
1762 }
1763 #endif
1764
1765 /* --------------------------------------------------------
1766  * Elimination of fully redundant nodes
1767  * --------------------------------------------------------
1768  */
1769
1770 /**
1771  * Walker which finds redundant nodes using avail_out sets
1772  * and exchanges them for existing ones.
1773  * We cannot change the graph here as this would affect
1774  * the hash values of the nodes.
1775  *
1776  * @param irn  the node
1777  * @param ctx  the walker environment
1778  */
1779 static void eliminate(ir_node *irn, void *ctx)
1780 {
1781         pre_env *env = (pre_env*)ctx;
1782
1783         if (! is_Block(irn)) {
1784                 ir_node    *block = get_nodes_block(irn);
1785                 block_info *info  = get_block_info(block);
1786                 ir_node    *value = identify(irn);
1787
1788                 if (value != NULL) {
1789                         ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1790                         DB((dbg, LEVEL_3, "Elim %+F(%+F) avail %+F\n", irn, value, expr));
1791
1792                         if (expr != NULL && expr != irn) {
1793                                 elim_pair *p = OALLOC(&env->obst, elim_pair);
1794
1795                                 p->old_node = irn;
1796                                 p->new_node = expr;
1797                                 p->next     = env->pairs;
1798                                 if (get_irn_idx(expr) > env->last_idx)
1799                                         p->reason = FS_OPT_GVN_PARTLY;
1800                                 else
1801                                         p->reason = FS_OPT_GVN_FULLY;
1802                                 env->pairs = p;
1803                                 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1804                         }
1805                 }
1806         }
1807 }
1808
1809 /**
1810  * Do all the recorded changes and optimize
1811  * newly created Phi's.
1812  *
1813  * @param pairs  list of elimination pairs
1814  */
1815 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1816 {
1817         elim_pair             *p;
1818         ir_node               *end    = environ->end_node;
1819
1820         for (p = pairs; p != NULL; p = p->next) {
1821                 /* might be already changed */
1822                 p->new_node = skip_Id(p->new_node);
1823
1824                 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1825
1826                 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1827                  * which we can optimize here */
1828                 if (is_Phi(p->new_node)) {
1829                         int      i;
1830                         ir_node *res = NULL;
1831
1832                         for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1833                                 ir_node *pred = get_irn_n(p->new_node, i);
1834
1835                                 if (pred != p->old_node) {
1836                                         if (res) {
1837                                                 res = NULL;
1838                                                 break;
1839                                         }
1840                                         res = pred;
1841                                 }
1842                         }
1843                         if (res) {
1844                                 exchange(p->new_node, res);
1845                                 p->new_node = res;
1846                         }
1847                 }
1848                 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1849
1850                 exchange(p->old_node, p->new_node);
1851         }
1852
1853         /* remove keep alive edges of unused mode_M phis */
1854         foreach_ir_nodeset(keeps, m_phi, iter) {
1855                 remove_End_keepalive(end, m_phi);
1856         }
1857 }
1858
1859
1860 /* --------------------------------------------------------
1861  * GVN PRE pass
1862  * --------------------------------------------------------
1863  */
1864
1865 /**
1866  * Gvn_Pre algorithm.
1867  *
1868  * @param irg   the graph
1869  * @param env   the environment
1870  */
1871 static void gvn_pre(ir_graph *irg, pre_env *env)
1872 {
1873         unsigned              antic_iter;
1874         unsigned              insert_iter;
1875
1876         DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1877
1878         /* allocate block info */
1879         irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1880
1881         ir_nodehashmap_init(&value_map);
1882
1883         /* generate exp_gen */
1884         irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
1885         dump_all_expgen_sets(env->list);
1886
1887         /* compute the avail_out sets for all blocks */
1888         dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
1889
1890         /* compute the anticipated value sets for all blocks */
1891         antic_iter      = 0;
1892         env->first_iter = 1;
1893
1894         env->iteration = 1;
1895         /* antic_in passes */
1896         do {
1897                 ++antic_iter;
1898                 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
1899                 env->changes = 0;
1900                 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
1901                 env->first_iter = 0;
1902                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1903                 env->iteration ++;
1904         } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
1905
1906         DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
1907
1908         ir_nodeset_init(env->keeps);
1909         insert_iter       = 0;
1910         env->first_iter   = 1;
1911         /* compute redundant expressions */
1912         do {
1913                 ++insert_iter;
1914                 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
1915                 env->changes = 0;
1916                 /* TODO topologically top down would be better; fewer iterations. */
1917                 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
1918                 env->first_iter = 0;
1919                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1920         } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
1921         DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
1922
1923 #if HOIST_HIGH
1924         /* An attempt to reduce lifetimes by hoisting already hoisted values
1925            even higher if their operands die. */
1926         dom_tree_walk_irg(irg, hoist_high, NULL, NULL);
1927         /* update avail_out for elimination */
1928         dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
1929 #endif
1930
1931         /* Deactivate edges to prevent intelligent removal of nodes,
1932            or else we will get deleted nodes which we try to exchange. */
1933         edges_deactivate(environ->graph);
1934
1935         /* eliminate nodes */
1936         irg_walk_graph(irg, NULL, eliminate, env);
1937         eliminate_nodes(env->pairs, env->keeps);
1938
1939         ir_nodeset_destroy(env->keeps);
1940 }
1941
1942 /**
1943  * Gvn_Pre pass for graph irg.
1944  *
1945  * @param irg   the graph
1946  */
1947 void do_gvn_pre(ir_graph *irg)
1948 {
1949         pre_env               env;
1950         ir_nodeset_t          keeps;
1951         optimization_state_t  state;
1952         block_info           *block_info;
1953
1954         /* bads and unreachables cause too much trouble with dominance,
1955            loop info for endless loop detection,
1956            no critical edges is PRE precondition
1957          */
1958         assure_irg_properties(irg,
1959                 IR_GRAPH_PROPERTY_NO_BADS
1960                 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
1961                 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
1962                 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
1963                 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
1964                 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
1965
1966         /* register a debug mask */
1967         FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
1968
1969         save_optimization_state(&state);
1970         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
1971
1972         edges_activate(irg);
1973
1974         environ = &env;
1975         DEBUG_ONLY(init_stats();)
1976
1977         /* setup environment */
1978         env.graph        = irg;
1979         env.list         = NULL;
1980         env.start_block  = get_irg_start_block(irg);
1981         env.end_block    = get_irg_end_block(irg);
1982         env.end_node     = get_irg_end(irg);
1983         env.pairs        = NULL;
1984         env.keeps        = &keeps;
1985         env.last_idx     = get_irg_last_idx(irg);
1986         obstack_init(&env.obst);
1987
1988         /* Detect and set links of infinite loops to non-zero. */
1989         analyse_loops(irg);
1990
1991 #if OPTIMIZE_NODES
1992         new_identities(irg);
1993         env.value_table = irg->value_table;
1994         irg->value_table = NULL;
1995 #endif
1996
1997         /* Switch on GCSE. We need it to correctly compute
1998            the value of a node, which is independent from
1999            its block. */
2000         set_opt_global_cse(1);
2001         /* new_identities() */
2002         if (irg->value_table != NULL)
2003                 del_pset(irg->value_table);
2004         /* initially assumed nodes in pset are 512 */
2005         irg->value_table = new_pset(compare_gvn_identities, 512);
2006 #if OPTIMIZE_NODES
2007         env.gvnpre_values = irg->value_table;
2008 #endif
2009
2010         /* do GVN-PRE pass */
2011         gvn_pre(irg, &env);
2012         DEBUG_ONLY(print_stats();)
2013
2014         /* clean up: delete all sets */
2015         for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2016                 free_block_info(block_info);
2017         }
2018
2019         DEBUG_ONLY(free_stats();)
2020         ir_nodehashmap_destroy(&value_map);
2021         obstack_free(&env.obst, NULL);
2022         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2023
2024         /* Pin the graph again.
2025            This is needed due to the use of set_opt_global_cse(1) */
2026         set_irg_pinned(irg, op_pin_state_pinned);
2027         restore_optimization_state(&state);
2028         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2029
2030 #if OPTIMIZE_NODES
2031         irg->value_table = env.value_table;
2032         del_pset(irg->value_table);
2033         irg->value_table = env.gvnpre_values;
2034 #endif
2035
2036         /* TODO There seem to be optimizations that try to use the existing
2037            value_table. */
2038         new_identities(irg);
2039
2040         /* TODO assure nothing else breaks. */
2041         set_opt_global_cse(0);
2042         edges_activate(irg);
2043 }
2044
2045 /* Creates an ir_graph pass for do_gvn_pre. */
2046 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2047 {
2048         return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
2049 }