Fixed high hoisting, additional comments, correct debug levels
[libfirm] / ir / opt / gvn_pre.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Global Value Numbering Partial Redundancy Elimination
23  *          (VanDrunen Hosking 2004)
24  * @author  Michael Beck
25  * @brief
26  */
27 #include "config.h"
28
29 #include "debug.h"
30 #include "ircons.h"
31 #include "irdom.h"
32 #include "iredges.h"
33 #include "irflag.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
41 #include "irouts.h"
42 #include "irpass.h"
43 #include "valueset.h"
44 #include "irloop.h"
45
46 #include "irgraph_t.h"
47 #include "irnode_t.h"
48 #include "iropt_t.h"
49 #include "plist.h"
50
51 #define MAX_ANTIC_ITER 10
52 #define MAX_INSERT_ITER 3
53
54 #define PARTLY_ONLY 0
55 #define HOIST_HIGH 1
56 #define BETTER_GREED 0
57 #define LOADS 0
58 #define DIVMODS 1
59 #define NO_INF_LOOPS 0
60
61 /** Additional info we need for every block. */
62 typedef struct block_info {
63         ir_valueset_t     *exp_gen;    /* contains this blocks clean expressions */
64         ir_valueset_t     *avail_out;  /* available values at block end */
65         ir_valueset_t     *antic_in;   /* clean anticipated values at block entry */
66         ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert nodes phase*/
67         ir_valueset_t     *new_set;    /* new by hoisting made available values */
68         ir_nodehashmap_t  *trans;      /* contains translated nodes translated into block */
69         ir_node           *avail;      /* saves available node for insert node phase */
70         int                found;      /* saves kind of availability for insert_node phase */
71         ir_node           *block;      /* block of the block_info */
72         struct block_info *next;       /* links all instances for easy access */
73 } block_info;
74
75 /**
76  * A pair of nodes to be exchanged.
77  * We have to defer the exchange because our hash-sets cannot
78  * find an already replaced node.
79  */
80 typedef struct elim_pair {
81         ir_node *old_node;      /* node that will be replaced */
82         ir_node *new_node;      /* replacement for old_node */
83         struct elim_pair *next; /* links all instances for easy access */
84         int     reason;         /* reason for the replacement */
85 } elim_pair;
86
87 /** environment for the GVN-PRE algorithm */
88 typedef struct pre_env {
89         ir_graph       *graph;        /* current graph */
90         struct obstack *obst;         /* obstack to allocate on */
91         ir_node        *start_block;  /* start block of the current graph */
92         ir_node        *end_block;    /* end block of the current graph */
93         ir_node        *end_node;     /* end node of the current graph */
94         block_info     *list;         /* block_info list head */
95         elim_pair      *pairs;        /* elim_pair list head */
96         ir_nodeset_t   *keeps;        /* a list of to be removed phis to kill their keep alive edges */
97         unsigned        last_idx;     /* last node index of input graph */
98         char            changes;      /* flag for fixed point iterations - non-zero if changes occurred */
99         char            first_iter;   /* non-zero for first fixed point iteration */
100 } pre_env;
101
102 static pre_env *environ;
103 /* custom GVN value map */
104 static ir_nodehashmap_t value_map;
105
106 /* debug module handle */
107 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
108
109 #ifdef DEBUG_libfirm
110
111 /* --------------------------------------------------------
112  * Statistics
113  * --------------------------------------------------------
114  */
115
116 typedef struct gvnpre_statistics {
117         int replaced;
118         int partially;
119         int fully;
120         int loads;
121         int divmods;
122         int hoist_high;
123         int first_iter_found;
124         int antic_iterations;
125         int insert_iterations;
126         int infinite_loops;
127 } gvnpre_statistics;
128
129 gvnpre_statistics *gvnpre_stats = NULL;
130
131 static void init_stats()
132 {
133         gvnpre_stats = XMALLOCZ(gvnpre_statistics);
134 }
135
136 static void free_stats()
137 {
138         free(gvnpre_stats);
139         gvnpre_stats = NULL;
140 }
141
142 static void print_stats()
143 {
144         gvnpre_statistics *stats = gvnpre_stats;
145         DB((dbg, LEVEL_1, "replaced             : %d\n", stats->replaced));
146         DB((dbg, LEVEL_1, "antic_in iterations  : %d\n", stats->antic_iterations));
147         DB((dbg, LEVEL_1, "insert iterations    : %d\n", stats->insert_iterations));
148         DB((dbg, LEVEL_1, "infinite loops       : %d\n", stats->infinite_loops));
149         DB((dbg, LEVEL_1, "fully redundant      : %d\n", stats->fully));
150         DB((dbg, LEVEL_1, "partially redundant  : %d\n", stats->partially));
151         DB((dbg, LEVEL_1, "  loads                : %d\n", stats->loads));
152         DB((dbg, LEVEL_1, "  Divs/Mods            : %d\n", stats->divmods));
153         DB((dbg, LEVEL_1, "  hoist high           : %d\n", stats->hoist_high));
154         DB((dbg, LEVEL_1, "  first iteration      : %d\n", stats->first_iter_found));
155 }
156
157 #define set_stats(var, value) (var)=(value)
158 #define inc_stats(var)        ((var)+=1)
159
160 /* --------------------------------------------------------
161  * Dump valuesets
162  * --------------------------------------------------------
163  */
164
165 /**
166  * Dump a value set.
167  *
168  * @param set    the set to dump
169  * @param txt    a text to describe the set
170  * @param block  the owner block of the set
171  */
172 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
173 {
174         ir_valueset_iterator_t iter;
175         ir_node *value, *expr;
176         int i;
177
178         DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
179         i = 0;
180         foreach_valueset(set, value, expr, iter) {
181                 if ((i & 3) == 3)
182                         DB((dbg, LEVEL_2, "\n"));
183                 if (value != expr)
184                         DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
185                 else
186                         DB((dbg, LEVEL_2, " %+F,", expr));
187                 ++i;
188         }
189         DB((dbg, LEVEL_2, "\n}\n"));
190 }  /* dump_value_set */
191
192 /**
193  * Dump all exp_gen value sets.
194  *
195  * @param list  the list of block infos to retrieve the sets from
196  */
197 static void dump_all_expgen_sets(block_info *list)
198 {
199         block_info *block_info;
200
201         for (block_info = list; block_info != NULL; block_info = block_info->next) {
202                 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
203         }
204 }
205
206 #else
207
208 #define dump_all_expgen_sets(list)
209 #define dump_value_set(set, txt, block)
210
211 #endif /* DEBUG_libfirm */
212
213 /* --------------------------------------------------------
214  * GVN Functions
215  * --------------------------------------------------------
216  */
217
218 /**
219  * Compares node collisions in valuetable.
220  * Modified identities_cmp().
221  */
222 static int compare_gvn_identities(const void *elt, const void *key)
223 {
224         ir_node *a = (ir_node *)elt;
225         ir_node *b = (ir_node *)key;
226         int i, irn_arity_a;
227
228         if (a == b) return 0;
229
230         /* phi nodes kill predecessor values and are always different */
231         if (is_Phi(a) || is_Phi(b))
232                 return 1;
233
234         /* memops are not the same, even if we want optimize them
235            we have to take the order in account */
236         if (is_memop(a) || is_memop(b))
237                 return 1;
238
239         if ((get_irn_op(a) != get_irn_op(b)) ||
240             (get_irn_mode(a) != get_irn_mode(b))) return 1;
241
242         /* compare if a's in and b's in are of equal length */
243         irn_arity_a = get_irn_arity(a);
244         if (irn_arity_a != get_irn_arity(b))
245                 return 1;
246
247         /* blocks are never the same */
248         if (is_Block(a) || is_Block(b))
249                 return 1;
250
251         /* TODO depends on load optimization */
252         if (get_irn_pinned(a) == op_pin_state_pinned) {
253                 /* for pinned nodes, the block inputs must be equal */
254                 if (get_irn_n(a, -1) != get_irn_n(b, -1))
255                         return 1;
256         } else {
257                 /* we need global values independently from their blocks */
258                 assert(get_opt_global_cse());
259         }
260
261         /* compare a->in[0..ins] with b->in[0..ins] */
262         for (i = 0; i < irn_arity_a; ++i) {
263                 ir_node *pred_a = get_irn_n(a, i);
264                 ir_node *pred_b = get_irn_n(b, i);
265                 if (pred_a != pred_b) {
266                         if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
267                                 return 1;
268                 }
269         }
270
271         /*
272          * here, we already now that the nodes are identical except their
273          * attributes
274          */
275         if (a->op->ops.node_cmp_attr)
276                 return a->op->ops.node_cmp_attr(a, b);
277
278         return 0;
279 }
280
281 /**
282  * Identify does a lookup in the GVN valuetable.
283  * To be used when no new GVN values are to be created.
284  *
285  * @param e  a node representing an expression
286  * @return a node representing the value
287  */
288 static ir_node *identify(ir_node *irn)
289 {
290         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
291         if (value)
292                 return value;
293         /* irn represents a new value, so return the leader */
294         return identify_remember(irn);
295 }
296
297 /**
298  * remember() adds node irn to the GVN valuetable.
299  * Identify_remember only identifies values of nodes with the
300  * same predecessor nodes (not values). By creating a node from the predecessor
301  * values/leaders, a true valuetree is built. Phis kill their predecessor value,
302  * so no circular dependencies need to be resolved.
303  *
304  * TODO Improvement:
305  *      Maybe this could be implemented with a custom node hash that takes
306  *      phi nodes and true values (instead of predecessors) into account,
307  *      resulting in value numbers.
308  * TODO This unnecessarily also handles nodes like calls, which are never equal.
309  *
310  * @param irn  a node representing an expression
311  * @return     the value of the expression
312  */
313 static ir_node *remember(ir_node *irn)
314 {
315         int       arity   = get_irn_arity(irn);
316         int       i;
317         int       changed = 0;
318         ir_node **in      = XMALLOCN(ir_node *, arity);
319         ir_node  *value;
320
321         for (i = 0; i < arity; ++i) {
322                 ir_node *pred       = get_irn_n(irn, i);
323                 /* value and leader at the same time */
324                 ir_node *pred_value = identify(pred);
325
326                 /* phi will be translated anyway, so kill the predecessor values
327                    this also prevents circular dependencies */
328                 if (is_Phi(pred)) {
329                         /* every phi represents its own value */
330                         in[i] = pred;
331                         continue;
332                 }
333
334                 /* predecessor is not its value representation/the leader */
335                 if (pred != pred_value)
336                         changed = 1;
337                 in[i] = pred_value;
338         }
339
340         if (changed) {
341                 /* create representative for */
342                 ir_node *nn = new_ir_node(
343                         get_irn_dbg_info(irn),
344                         get_irn_irg(irn),
345                         get_nodes_block(irn),
346                         get_irn_op(irn),
347                         get_irn_mode(irn),
348                         get_irn_arity(irn),
349                         in);
350                 copy_node_attr(environ->graph, irn, nn);
351
352                 /* now the value can be determined because the
353                    predecessors are the leaders */
354                 value = identify_remember(nn);
355         } else {
356                 value = identify_remember(irn);
357         }
358         free(in);
359
360         DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
361         ir_nodehashmap_insert(&value_map, irn, value);
362
363         return value;
364 }
365
366 /** When the value map has been built we may lookup expressions
367  *  and remember them if new.
368  */
369 static ir_node *identify_or_remember(ir_node *irn)
370 {
371         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
372         if (value)
373                 return value;
374         else
375                 return remember(irn);
376 }
377
378 /* --------------------------------------------------------
379  * Block info
380  * --------------------------------------------------------
381  */
382
383 /**
384  * Allocate block info for block block.
385  *
386  * @param block   the block
387  * @param env     the environment
388  */
389 static void alloc_block_info(ir_node *block, pre_env *env)
390 {
391         block_info *info = OALLOC(env->obst, block_info);
392
393         set_irn_link(block, info);
394         info->exp_gen    = ir_valueset_new(16);
395         info->avail_out  = ir_valueset_new(16);
396         info->antic_in   = ir_valueset_new(16);
397         info->antic_done = ir_valueset_new(16);
398         info->trans = XMALLOC(ir_nodehashmap_t);
399         ir_nodehashmap_init(info->trans);
400
401         info->new_set = NULL;
402         info->avail   = NULL;
403         info->block   = block;
404         info->found   = 1;
405
406         info->next = env->list;
407         env->list  = info;
408 }  /* alloc_block_info */
409
410 static void free_block_info(block_info *block_info)
411 {
412         ir_valueset_del(block_info->exp_gen);
413         ir_valueset_del(block_info->avail_out);
414         ir_valueset_del(block_info->antic_in);
415         if (block_info->trans) {
416                 ir_nodehashmap_destroy(block_info->trans);
417                 free(block_info->trans);
418         }
419         if (block_info->new_set)
420                 ir_valueset_del(block_info->new_set);
421 }
422
423 /**
424  * Bottom up walker that ensures that every block gets a block info.
425  *
426  * @param irn   the node
427  * @param ctx   the environment
428  */
429 static void block_info_walker(ir_node *irn, void *ctx)
430 {
431         if (is_Block(irn)) {
432                 pre_env *env = (pre_env*)ctx;
433                 alloc_block_info(irn, env);
434         }
435 }
436
437 /**
438  * Returns the block info of a block.
439  */
440 static block_info *get_block_info(ir_node *block)
441 {
442         return (block_info*)get_irn_link(block);
443 }
444
445 /* --------------------------------------------------------
446  * Infinite loop analysis
447  * --------------------------------------------------------
448  */
449
450 /**
451  * Walker to set block marks and loop links to 0.
452  */
453 static void clear_block_mark_loop_link(ir_node *block, void *env)
454 {
455         (void) env;
456
457         if (is_Block(block)) {
458                 set_Block_mark(block, 0);
459                 set_loop_link(get_irn_loop(block), NULL);
460         }
461 }
462
463 /**
464  * Returns non-zero if block is part of real loop loop.
465  */
466
467 static unsigned in_loop(ir_node *block, ir_loop *loop)
468 {
469         ir_loop *l     = get_irn_loop(block);
470         ir_loop *outer = get_irg_loop(environ->graph);
471
472         while (l != loop) {
473                 /* loop tree root is not a loop */
474                 if (l == NULL || l == outer)
475                         return 0;
476                 l = get_loop_outer_loop(l);
477         }
478         return 1;
479 }
480
481 /**
482  * Returns the outermost real loop of loop.
483  */
484 static ir_loop *get_loop_outermost(ir_loop *loop)
485 {
486         ir_loop *outer = get_irg_loop(environ->graph);
487         ir_loop *l     = loop;
488         ir_loop *last  = NULL;
489
490         while(l != outer) {
491                 last = l;
492                 l = get_loop_outer_loop(l);
493         }
494         return last;
495 }
496
497 /**
498  * Topologic bottom-up walker sets links of infinite loops to non-zero.
499  * Block marks are used to flag blocks reachable (from end) on the one hand,
500  * on the other hand they are set if the block is not part of an infinite loop.
501  */
502 static void infinite_loop_walker(ir_node *block, void *env)
503 {
504         int arity;
505         int i;
506         (void) env;
507
508         if (! is_Block(block))
509                 return;
510
511         /* start block has no predecessors */
512         if (block == environ->start_block)
513                 return;
514
515         arity = get_irn_arity(block);
516
517         /* block not part of a real loop: no infinite loop */
518         if (get_irn_loop(block) == get_irg_loop(environ->graph))
519                 set_Block_mark(block, 1);
520
521         if (get_Block_mark(block)) {
522                 /* reachable block: mark all cf predecessors */
523                 for (i = 0; i < arity; ++i) {
524                         ir_node *pred = get_Block_cfgpred_block(block, i);
525                         if (is_Bad(pred))
526                                 continue;
527                         set_Block_mark(pred, 1);
528                 }
529         } else {
530                 /* We are in a real loop and see an unreachable block. */
531                 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
532
533                 /* flag loop as infinite */
534                 set_loop_link(outermost_loop, outermost_loop);
535                 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
536
537                 /* The cf predecessors are unreachable, but can never be part of
538                    an infinite loop, because we just reached them. So we set the
539                    blockmark to prevent triggering the infinite loop detection. */
540
541                 /* passing information to the cf predecessors */
542                 for (i = 0; i < arity; ++i) {
543                         ir_node *pred = get_Block_cfgpred_block(block, i);
544
545                         if (is_Bad(pred))
546                                 continue;
547
548                         /* If our cf predecessor is in the same endless loop,
549                            it is also unreachable. */
550                         if (in_loop(pred, outermost_loop)) {
551                                 set_Block_mark(pred, 0);
552                         } else {
553                                 /* When we leave the unreachable loop, we artificially
554                                    declare the cf predecessor reachable. */
555                                 set_Block_mark(pred, 1);
556                         }
557                 }
558         }
559 }
560
561 /**
562  * Sets loop links of outermost infinite loops to non-zero.
563  */
564 static void analyse_loops(ir_graph *irg)
565 {
566         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
567
568         /* reset block mark and loop links */
569         irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
570
571         /* mark end block reachable */
572         set_Block_mark(get_irg_end_block(irg), 1);
573         irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
574
575         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
576 }
577
578 #if NO_INF_LOOPS
579 /**
580  * Returns non-zero if block is part of an infinite loop.
581  */
582 static unsigned is_in_infinite_loop(ir_node *block)
583 {
584         ir_loop *loop;
585
586         assert(is_Block(block));
587         loop = get_irn_loop(block);
588         assert(loop);
589
590         loop = get_loop_outermost(loop);
591         if (loop)
592                 return (get_loop_link(loop) != NULL);
593         else
594                 return 0;
595 }
596 #endif
597
598 /* --------------------------------------------------------
599  * GVN-PRE Exp_gen
600  * --------------------------------------------------------
601  */
602
603 /**
604  * Helper function to get the anti leader of node in block.
605  */
606 static ir_node *get_anti_leader(ir_node *node, ir_node *block)
607 {
608         block_info *info   = get_block_info(block);
609         ir_node    *value  = identify(node);
610         ir_node    *leader = ir_valueset_lookup(info->antic_in, value);
611
612         if (leader)
613                 return leader;
614         else
615                 /* this applies in the context of this algorithm */
616                 return node;
617 }
618
619 /**
620  * Returns non-zero if a node is movable and a possible candidate for PRE.
621  */
622 static unsigned is_nice_value(ir_node *n)
623 {
624         ir_mode *mode = get_irn_mode(n);
625
626         if (is_Phi(n))
627                 return 1;
628
629 #if LOADS || DIVMODS
630         if (is_Proj(n) && mode != mode_X && mode != mode_T)
631                 return 1;
632 #else
633         if (is_Proj(n))
634                 return 0;
635 #endif
636
637 #if LOADS
638         if (is_Load(n))
639                 return (get_Load_volatility(n) == volatility_non_volatile);
640 #endif
641
642         if (get_irn_pinned(n) == op_pin_state_pinned)
643                 return 0;
644
645         if (! mode_is_data(mode)) {
646                 if (! is_Div(n) && ! is_Mod(n))
647                         return 0;
648         }
649         return 1;
650 }
651
652 /**
653  * Checks if a node n is clean in block block for exp_gen.
654  *
655  * @param n      the node
656  * @param block  the block
657  * @return non-zero value for clean node
658  */
659 static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
660 {
661         int         i, arity;
662
663         if (is_Phi(n))
664                 return 1;
665
666         if (! is_nice_value(n))
667                 return 0;
668
669         arity = get_irn_arity(n);
670         for (i = 0; i < arity; ++i) {
671                 ir_node *pred   = get_irn_n(n, i);
672                 ir_node *value;
673
674                 if (is_Phi(pred))
675                         continue;
676
677                 /* we only handle current block */
678                 if (get_nodes_block(pred) != block)
679                         continue;
680
681                 if (! is_nice_value(pred))
682                         return 0;
683
684                 value = identify(pred);
685                 if (! ir_valueset_lookup(valueset, value))
686                         return 0;
687
688         }
689         return 1;
690 }
691
692 /**
693  * Topological walker puts nodes in top-down topological order into exp_gen set.
694  * Assumed to walk blockwise and nodewise topologically top-down.
695  *
696  * @param irn    the node
697  * @param ctx    the environment
698  */
699 static void topo_walker(ir_node *irn, void *ctx)
700 {
701         ir_node    *block;
702         block_info *info;
703         ir_node    *value;
704         (void) ctx;
705
706         if (is_Block(irn))
707                 return;
708
709         if (! is_nice_value(irn))
710                 return;
711
712         /* GVN step: remember the value. */
713         value = remember(irn);
714
715         if (is_irn_constlike(irn))
716                 return;
717
718         block = get_nodes_block(irn);
719         info  = get_block_info(block);
720
721         ir_valueset_insert(info->avail_out, value, irn);
722
723         if (is_clean_in_block(irn, block, info->exp_gen)) {
724                 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
725
726                 ir_valueset_insert(info->exp_gen, value, irn);
727         }
728 }
729
730 /* --------------------------------------------------------
731  * GVN-PRE Antic_in
732  * --------------------------------------------------------
733  */
734
735 /**
736  * Gets result of nodes phi translation into block.
737  *
738  * @param node   the node
739  * @param block  the target block
740  *
741  * @return a phi translation of node node into block block or NULL
742  */
743 static ir_node *get_translated(ir_node *block, ir_node *node)
744 {
745         ir_node *trans;
746         if (is_irn_constlike(node))
747                 return node;
748
749         trans = ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
750         if (trans == NULL)
751                 return node;
752         else
753                 return trans;
754 }
755
756 static ir_node *get_translated_pred(ir_node *block, int pos, ir_node *node)
757 {
758         ir_node *pred_block;
759         if (is_irn_constlike(node))
760                 return node;
761
762         pred_block = get_Block_cfgpred_block(block, pos);
763         return ir_nodehashmap_get(ir_node, get_block_info(pred_block)->trans, node);
764 }
765
766 /**
767  * Saves result of phi translation of node into predecessor
768  * at pos of block succ.
769  *
770  * @param node   the node
771  * @param succ   the successor of the translation target block
772  * @param pos    the position of the predecessor block
773  * @param trans  the translation result
774  *
775  */
776 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
777 {
778         if (is_irn_constlike(node))
779                 return;
780         /* insert or replace */
781         ir_nodehashmap_insert(map, node, trans);
782 }
783
784 #if LOADS || DIVMODS
785 /* Helper function to compare the values of pred and avail_pred. */
786 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
787 {
788         ir_node *avail_value  = identify(avail_pred);
789         ir_node *trans_pred   = get_translated_pred(block, pos, pred);
790         ir_node *value;
791
792         if (trans_pred == NULL)
793                 trans_pred = pred;
794         value = identify(trans_pred);
795
796         DB((dbg, LEVEL_3, "manual compare %+F  %+F\n", pred, avail_pred));
797
798         return (value == avail_value);
799 }
800 #endif
801
802 #if LOADS
803 /**
804  * Does phi translation for redundant load nodes only.
805  * Returns NULL for non-redundant loads, which need to be phi translated.
806  * Loads are compared by comparing their pointer values,
807  * and assuring that they are adjacent.
808  * This is equivalent to what phi_translation does.
809  */
810 static ir_node *phi_translate_load(ir_node *load, ir_node *block, int pos)
811 {
812         ir_node *mem   = get_Load_mem(load);
813         ir_node *trans = get_translated_pred(block, pos, mem);
814
815         if (trans == NULL)
816                 trans = mem;
817
818         /* no partial redundancy if this is a mode_M phi */
819         if (is_Proj(trans)) {
820                 /* The last memory operation in predecessor block */
821                 ir_node *avail_load = get_Proj_pred(trans);
822
823                 /* memop is a load with matching type */
824                 if (is_Load(avail_load) &&
825                             get_Load_mode(load) == get_Load_mode(avail_load)) {
826
827                         unsigned match = match_pred(get_Load_ptr(load), get_Load_ptr(avail_load), block, pos);
828
829                         if (match)
830                                 return avail_load;
831                 }
832         }
833         return NULL;
834 }
835 #endif
836
837 #if DIVMODS
838 /**
839  * Does phi translation for redundant Div/Mod nodes only.
840  * Returns NULL for non-redundant node, which needs to be phi translated.
841  */
842 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
843 {
844         ir_node *mem   = get_memop_mem(divmod);
845         ir_node *trans = get_translated_pred(block, pos, mem);
846
847         if (trans == NULL)
848                 trans = mem;
849
850         /* no partial redundancy if this is a mode_M phi */
851         if (is_Proj(trans)) {
852                 /* The last memory operation in predecessor block */
853                 ir_node *avail_op = get_Proj_pred(trans);
854
855                 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
856                         unsigned left, right;
857
858                         if (is_Div(avail_op)) {
859                                 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
860                                     get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
861
862                                         left  = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
863                                         right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
864
865                                         if (left && right)
866                                                 return avail_op;
867                                 }
868                         } else if (is_Mod(avail_op)) {
869                             if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
870
871                                         left  = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
872                                         right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
873
874                                         if (left && right)
875                                                 return avail_op;
876                                 }
877                         }
878                 }
879         }
880         return NULL;
881 }
882 #endif
883
884 /**
885  * Translates an expression above a Phi.
886  *
887  * @param node        the node
888  * @param block       the block the node is translated into
889  * @param pos         the input number of the destination block
890  *
891  * @return a node representing the translated value
892  */
893 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_node *pred_block)
894 {
895         int       i;
896         int       arity;
897         ir_node **in;
898         ir_node  *target_block;
899         ir_node  *nn;
900         int       needed;
901
902         if (is_Phi(node)) {
903                 if (get_nodes_block(node) == block)
904                         return get_Phi_pred(node, pos);
905                 /* this phi does not need translation */
906                 return node;
907         }
908         arity = get_irn_arity(node);
909
910 #if LOADS
911         if (is_Load(node)) {
912                 ir_node *avail_load = phi_translate_load(node, block, pos);
913                 if (avail_load)
914                         return avail_load;
915         }
916 #endif
917
918 #if DIVMODS
919         if (is_Div(node) || is_Mod(node)) {
920                 ir_node *avail_op = phi_translate_divmod(node, block, pos);
921                 if (avail_op)
922                         return avail_op;
923         }
924 #endif
925
926         needed = 0;
927         in = XMALLOCN(ir_node *, arity);
928
929         /* A value has several representatives. The anti leader is chosen to be
930            the main representative. If we access a node as representative of a
931            value we always use the anti leader. The anti leader can be found by
932            antic_in(identify(node)). */
933         for (i = 0; i < arity; ++i) {
934                 /* get anti leader for pred to lookup its translated value */
935                 ir_node    *pred        = get_irn_n(node, i);
936                 ir_node    *anti_leader = get_anti_leader(pred, block);
937                 /* we cannot find this value in antic_in, because the value
938                    has (possibly) changed! */
939                 ir_node    *pred_trans  = get_translated(pred_block, anti_leader);
940                 ir_node    *expr;
941
942                 if (pred_trans == NULL) {
943                         expr = pred;
944                 } else {
945                         expr = pred_trans;
946                         /* predecessor value changed, so translation is needed */
947                         if (identify(expr) != identify(pred))
948                                 needed |= 1;
949                 }
950
951                 DB((dbg, LEVEL_4, "in %+F\n", expr));
952                 in[i] = expr;
953         }
954
955         if (! needed)
956                 return node;
957
958         DB((dbg, LEVEL_3, "Translate\n"));
959
960         target_block = get_Block_cfgpred_block(block, pos);
961         if (is_Proj(node))
962                 target_block = get_nodes_block(in[0]);
963
964         /* copy node to represent the new value.
965            We do not translate nodes that do not need translation,
966            so we use the newly created nodes as value representatives only.
967            Their block is not important, because we create new ones during
968            insert node phase. */
969         nn = new_ir_node(
970                 get_irn_dbg_info(node),
971                 environ->graph,
972                 target_block,
973                 get_irn_op(node),
974                 get_irn_mode(node),
975                 arity,
976                 in);
977         free(in);
978         /* We need the attribute copy here, because the Hash value of a
979            node might depend on it. */
980         copy_node_attr(environ->graph, node, nn);
981         /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
982            because it already uses availability. */
983
984         DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
985         return nn;
986 }
987
988 /**
989  * Block-walker, computes Antic_in(block).
990  * Builds a value tree out of the graph by translating values
991  * over phi nodes.
992  *
993  * @param block  the block
994  * @param ctx    the walker environment
995  */
996 static void compute_antic(ir_node *block, void *ctx)
997 {
998         pre_env                *env       = (pre_env*)ctx;
999         block_info             *succ_info;
1000         block_info             *info;
1001         ir_node                *succ;
1002         ir_node                *value;
1003         ir_node                *expr;
1004         size_t                  size;
1005         ir_valueset_iterator_t  iter;
1006         int                     n_succ;
1007
1008         /* filter blocks from topological walker */
1009         if (! is_Block(block))
1010                 return;
1011
1012         /* the end block has no successor */
1013         if (block == env->end_block)
1014                 return;
1015
1016         info = get_block_info(block);
1017         /* track changes */
1018         size = ir_valueset_size(info->antic_in);
1019         n_succ = get_Block_n_cfg_outs(block);
1020
1021         /* add exp_gen */
1022         if (env->first_iter) {
1023 #if NO_INF_LOOPS
1024                 /* keep antic_in of infinite loops empty */
1025                 if (! is_in_infinite_loop(block)) {
1026                         foreach_valueset(info->exp_gen, value, expr, iter) {
1027                                 ir_valueset_insert(info->antic_in, value, expr);
1028                         }
1029                 }
1030 #else
1031                 foreach_valueset(info->exp_gen, value, expr, iter) {
1032                         ir_valueset_insert(info->antic_in, value, expr);
1033                 }
1034 #endif
1035         }
1036
1037         /* successor might have phi nodes */
1038         if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
1039                 succ      = get_Block_cfg_out(block, 0);
1040                 int pos   = get_Block_cfgpred_pos(succ, block);
1041                 succ_info = get_block_info(succ);
1042
1043                 /* initialize translated set */
1044                 if (env->first_iter) {
1045                         info->trans = XMALLOC(ir_nodehashmap_t);
1046                         ir_nodehashmap_init(info->trans);
1047                 }
1048
1049                 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1050                         ir_node *trans       = phi_translate(expr, succ, pos, block);
1051                         /* create new value if necessary */
1052                         ir_node *trans_value = identify_or_remember(trans);
1053                         ir_node *represent;
1054
1055                         DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1056
1057                         /* on value change (phi present) we need the translated node
1058                            to represent the new value for possible further translation. */
1059                         if (value != trans_value)
1060                                 represent = trans;
1061                         else
1062                                 represent = expr;
1063
1064                         if (is_clean_in_block(expr, block, info->antic_in))
1065                                 ir_valueset_replace(info->antic_in, trans_value, represent);
1066                         set_translated(info->trans, expr, represent);
1067                 }
1068
1069         } else if (n_succ > 1) {
1070                 int         i;
1071                 ir_node    *common     = NULL;
1072                 ir_node    *succ0      = get_Block_cfg_out(block, 0);
1073                 block_info *succ0_info = get_block_info(succ0);
1074
1075                 /* disjoint of antic_ins */
1076                 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1077                         /* iterate over remaining successors */
1078                         for (i = 1; i < n_succ; ++i) {
1079                                 ir_node    *succ      = get_Block_cfg_out(block, i);
1080                                 block_info *succ_info = get_block_info(succ);
1081
1082                                 /* value in antic_in? */
1083                                 common = ir_valueset_lookup(succ_info->antic_in, value);
1084                                 if (common == NULL)
1085                                         break;
1086                         }
1087
1088                         if (common && is_clean_in_block(expr, block, info->antic_in))
1089                                 ir_valueset_replace(info->antic_in, value, expr);
1090                 }
1091         }
1092
1093
1094         DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1095
1096         if (size != ir_valueset_size(info->antic_in))
1097                 env->changes |= 1;
1098 }
1099
1100 /* --------------------------------------------------------
1101  * Main algorithm Avail_out
1102  * --------------------------------------------------------
1103  */
1104
1105 /**
1106  * Computes Avail_out(block):
1107  *
1108  * Avail_in(block)  = Avail_out(dom(block))
1109  * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1110  *
1111  * Precondition:
1112  *  This function must be called in the top-down topological order:
1113  *  Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1114  *
1115  * @param block   the block
1116  * @param ctx     walker context
1117  */
1118 static void compute_avail_top_down(ir_node *block, void *ctx)
1119 {
1120         pre_env    *env   = (pre_env*)ctx;
1121         block_info *info;
1122
1123         if (block == env->end_block)
1124                 return;
1125
1126         info  = get_block_info(block);
1127
1128         /* Add all nodes from the immediate dominator.
1129            This ensures that avail_out contains the leader. */
1130         if (block != env->start_block) {
1131                 ir_node                *dom_block = get_Block_idom(block);
1132                 block_info             *dom_info  = get_block_info(dom_block);
1133                 ir_node                *value;
1134                 ir_node                *expr;
1135                 ir_valueset_iterator_t  iter;
1136
1137                 foreach_valueset(dom_info->avail_out, value, expr, iter)
1138                         /* replace: use the leader from dominator, not local exp_gen */
1139                         ir_valueset_replace(info->avail_out, value, expr);
1140         }
1141
1142         DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1143 }
1144
1145 /* --------------------------------------------------------
1146  * Main algorithm redundancy detection
1147  * --------------------------------------------------------
1148  */
1149
1150 /**
1151  * Returns a valid mode if the value of expr is a partially redundant value.
1152  *
1153  * @param block   the block
1154  * @param expr    the expression
1155  *
1156  * @return mode of the expression if it is partially redundant else NULL
1157  */
1158 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1159 {
1160         ir_node *first_avail         = NULL;
1161         int      pos;
1162         int      arity               = get_irn_arity(block);
1163         int      fully_redundant     = 1;
1164         int      partially_redundant = 0;
1165         ir_mode *mode                = NULL;
1166
1167         DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1168
1169         /* for each predecessor blocks */
1170         for (pos = 0; pos < arity; ++pos) {
1171                 ir_node    *pred_block  = get_Block_cfgpred_block(block, pos);
1172                 block_info *pred_info;
1173                 ir_node    *trans_expr;
1174                 ir_node    *trans_value;
1175                 ir_node    *avail_expr;
1176
1177                 /* ignore bad blocks. */
1178                 if (is_Bad(pred_block))
1179                         continue;
1180
1181                 pred_info  = get_block_info(pred_block);
1182                 trans_expr = get_translated(pred_block, expr);
1183                 trans_value = identify(trans_expr);
1184
1185                 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1186                 /* value might be available through a not yet existing constant */
1187                 if (avail_expr == NULL && is_Const(trans_expr)) {
1188                         /* limit range of new constants */
1189                         ir_mode   *cmode = get_irn_mode(trans_expr);
1190                         ir_tarval *upper = new_tarval_from_long(127, cmode);
1191                         ir_tarval *lower = new_tarval_from_long(-127, cmode);
1192                         ir_tarval *c     = get_Const_tarval(trans_expr);
1193
1194                         if (tarval_cmp(lower, c) != ir_relation_greater_equal &&
1195                                 tarval_cmp(c, upper) != ir_relation_greater_equal) {
1196                                 avail_expr = NULL;
1197                         } else {
1198                                 avail_expr = trans_expr;
1199                         }
1200             }
1201
1202                 DB((dbg, LEVEL_3, "avail_expr %+F  trans_expr %+F\n", avail_expr, trans_expr));
1203
1204                 if (avail_expr == NULL) {
1205                         pred_info->avail = trans_expr;
1206                         pred_info->found = 0;
1207                         fully_redundant  = 0;
1208                 } else {
1209                         /* expr is available */
1210                         pred_info->avail    = avail_expr;
1211                         pred_info->found    = 1;
1212                         mode                = get_irn_mode(avail_expr);
1213                         partially_redundant = 1;
1214
1215                         if (first_avail == NULL)
1216                                 first_avail = avail_expr;
1217                         else if (first_avail != avail_expr)
1218                                 /* Multiple different expressions are available */
1219                                 fully_redundant = 0;
1220
1221                         DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1222                 }
1223         }
1224
1225 #if BETTER_GREED
1226         /* value is redundant from last iteration,
1227            but has not been removed from antic_in (is not optimized) */
1228         if (! environ->first_iter && is_redundant(block, expr))
1229                 return mode;
1230 #endif
1231
1232         /* If it is not the same value already existing along every predecessor
1233        and it is defined by some predecessor then it is partially redundant. */
1234         if (! partially_redundant || fully_redundant)
1235                 return NULL;
1236         return mode;
1237 }
1238
1239 /**
1240  * Updates the new_set of a block by adding the new_set of
1241  * the immediate dominating block.
1242  *
1243  * @param  the block
1244  */
1245 static void update_new_set(ir_node *block, ir_node *idom)
1246 {
1247         ir_node                *value;
1248         ir_node                *expr;
1249         ir_valueset_iterator_t  iter;
1250         block_info             *curr_info = get_block_info(block);
1251         block_info             *idom_info = get_block_info(idom);
1252         int                     updated   = 0;
1253
1254         DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1255         foreach_valueset(idom_info->new_set, value, expr, iter) {
1256                 /* inherit new_set from immediate dominator */
1257                 ir_valueset_insert(curr_info->new_set, value, expr);
1258                 /* replace in avail_out */
1259                 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1260         }
1261 #ifdef DEBUG_libfirm
1262         if (updated)
1263                 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1264 #endif
1265 } /* update_new_set */
1266
1267 #if BETTER_GREED
1268 /*
1269  * Returns redundant flag of node irn in block block.
1270  */
1271 static unsigned is_redundant(ir_node *block, ir_node *irn)
1272 {
1273         (void) block;
1274         (void) irn;
1275
1276         /* TODO Needs to use a flag, because antic_done should only be used
1277            if node is finally processed by insert_nodes. */
1278         return 0;
1279 }
1280 #endif
1281
1282 /**
1283  * Checks if hoisting irn is greedy.
1284  * Greedy hoisting means that there are non partially redundant nodes
1285  * hoisted. This happens if a partially redundant node has
1286  * non redundant predecessors.
1287  */
1288 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1289 {
1290         int block_arity = get_irn_arity(block);
1291         int arity = get_irn_arity(irn);
1292         int pos, i;
1293
1294         /* As long as the predecessor values is available in all predecessor blocks,
1295            we can hoist this value. */
1296         for (pos = 0; pos < block_arity; ++pos) {
1297                 ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1298                 block_info *pred_info  = get_block_info(pred_block);
1299
1300                 for (i = 0; i < arity; ++i) {
1301                         ir_node *pred        = get_irn_n(irn, i);
1302                         ir_node *value       = identify(pred);
1303
1304                         if (is_irn_constlike(value))
1305                                 continue;
1306
1307                         /* predecessor is on current path we have to ensure redundancy */
1308                         if (ir_valueset_lookup(pred_info->avail_out, value) == NULL)
1309                                 return 1;
1310                 }
1311         }
1312         return 0;
1313 }
1314
1315 /**
1316  * Perform insertion of partially redundant values.
1317  * For every block node, do the following:
1318  * 1.  Propagate the NEW_SETS of the dominator into the current block.
1319  * If the block has multiple predecessors,
1320  *     2a. Iterate over the ANTIC expressions for the block to see if
1321  *         any of them are partially redundant.
1322  *     2b. If so, insert them into the necessary predecessors to make
1323  *         the expression fully redundant.
1324  *     2c. Insert a new Phi merging the values of the predecessors.
1325  *     2d. Insert the new Phi, and the new expressions, into the
1326  *         NEW_SETS set.
1327  *
1328  * @param block  the block
1329  * @param ctx    the walker environment
1330  */
1331 static void insert_nodes_walker(ir_node *block, void *ctx)
1332 {
1333         pre_env                *env    = (pre_env*)ctx;
1334         int                     arity  = get_irn_arity(block);
1335         ir_node                *value;
1336         ir_node                *expr;
1337         block_info             *info;
1338         ir_node                *idom;
1339         int                     pos;
1340         ir_valueset_iterator_t  iter;
1341 #if BETTER_GREED
1342         plist_t *stack;
1343 #endif
1344
1345         /* only blocks */
1346         if (! is_Block(block))
1347                 return;
1348
1349         /* ensure that even the start block has a new_set */
1350         info = get_block_info(block);
1351         if (info->new_set)
1352                 ir_valueset_del(info->new_set);
1353         info->new_set = ir_valueset_new(16);
1354
1355         if (block == env->start_block)
1356                 return;
1357
1358         DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1359
1360         idom = get_Block_idom(block);
1361         update_new_set(block, idom);
1362
1363         /* process only path joining blocks */
1364         if (arity < 2) {
1365                 return;
1366         }
1367
1368 #if BETTER_GREED
1369         stack = plist_new();
1370         foreach_valueset(info->antic_in, value, expr, iter) {
1371                 /* inverse topologic */
1372                 plist_insert_front(stack, expr);
1373         }
1374 #endif
1375
1376         /* This is the main reason antic_in is preverred over antic_out;
1377            we may iterate over every anticipated value first and not
1378            over the predecessor blocks. */
1379         foreach_valueset(info->antic_in, value, expr, iter) {
1380                 ir_mode  *mode;
1381                 ir_node  *phi;
1382                 ir_node **phi_in;
1383
1384                 /* already done? */
1385                 if (ir_valueset_lookup(info->antic_done, value))
1386                         continue;
1387
1388                 /* filter phi nodes from antic_in */
1389                 if (is_Phi(expr))
1390                         continue;
1391
1392                 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1393
1394                 /* A value computed in the dominator is totally redundant.
1395                    Hence we have nothing to insert. */
1396                 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1397                         DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1398                         DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1399
1400                         continue;
1401                 }
1402
1403 #if !BETTER_GREED
1404                 if (is_hoisting_greedy(expr, block)) {
1405                         DB((dbg, LEVEL_2, "greedy\n"));
1406                         continue;
1407                 }
1408 #endif
1409
1410                 mode = is_partially_redundant(block, expr, value);
1411                 if (mode == NULL)
1412                         continue;
1413
1414 #if BETTER_GREED
1415                 if (is_hoisting_greedy(expr, block)) {
1416                         DB((dbg, LEVEL_2, "Better greed: greedy\n"));
1417                         continue;
1418                 }
1419 #endif
1420
1421 #if LOADS || DIVMODS
1422                 /* save old mode_M phis to remove keepalive edges later */
1423                 if (is_memop(expr)) {
1424                         ir_node *mem = get_memop_mem(expr);
1425                         if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1426                                 ir_nodeset_insert(env->keeps, mem);
1427                         }
1428                 }
1429 #endif
1430
1431 #ifdef DEBUG_libfirm
1432                 if (! is_Proj(expr)) {
1433                         if (env->first_iter)
1434                                 inc_stats(gvnpre_stats->first_iter_found);
1435                         inc_stats(gvnpre_stats->partially);
1436                 }
1437                 if (is_Load(expr))
1438                         inc_stats(gvnpre_stats->loads);
1439                 else if (is_Div(expr) || is_Mod(expr))
1440                         inc_stats(gvnpre_stats->divmods);
1441 #endif
1442
1443                 phi_in = XMALLOCN(ir_node *, arity);
1444
1445                 /* for each predecessor block */
1446                 for (pos = 0; pos < arity; ++pos) {
1447                         ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1448                         block_info *pred_info;
1449
1450                         /* ignore bad blocks. */
1451                         if (is_Bad(pred_block)) {
1452                                 ir_graph *irg = get_irn_irg(pred_block);
1453                                 phi_in[pos] = new_r_Bad(irg, mode);
1454                                 continue;
1455                         }
1456                         pred_info = get_block_info(pred_block);
1457
1458                         if (! pred_info->found) {
1459                                 ir_node *trans = get_translated(pred_block, expr);
1460                                 assert(trans);
1461
1462                                 if (! block_dominates(get_nodes_block(trans), pred_block)) {
1463                                         int       trans_arity = get_irn_arity(trans);
1464                                         int       i;
1465                                         ir_node **in          = XMALLOCN(ir_node *, trans_arity);
1466                                         ir_node  *nn;
1467                                         /* non phi descendants can also be redundant, but have
1468                                            not yet been translated and copied into target block. */
1469                                         for (i = 0; i < trans_arity; ++i) {
1470                                                 ir_node *pred  = get_irn_n(trans, i);
1471                                                 ir_node *avail = ir_valueset_lookup(pred_info->avail_out, identify(pred));
1472                                                 if (! avail)
1473                                                         avail = pred;
1474                                                 /* phi translate uses anti leader, we need the leader */
1475                                                 in[i] = avail;
1476                                         }
1477                                         nn = new_ir_node(
1478                                                 get_irn_dbg_info(trans),
1479                                                 environ->graph,
1480                                                 pred_block,
1481                                                 get_irn_op(trans),
1482                                                 get_irn_mode(trans),
1483                                                 trans_arity,
1484                                                 in);
1485                                         free(in);
1486
1487                                         identify_or_remember(nn);
1488                                         set_translated(pred_info->trans, expr, nn);
1489                                         DB((dbg, LEVEL_3, "Translation during insert: trans %+F\n", nn));
1490
1491                                         trans = nn;
1492                                 }
1493
1494                                 DB((dbg, LEVEL_3, "Use new %+F in %+F because expr %+F(%+F) not available\n", trans, pred_block, expr, value));
1495
1496                                 /* value is now available in target block through trans
1497                                    insert (not replace) because it has not been available */
1498                                 ir_valueset_insert(pred_info->avail_out, value, trans);
1499
1500                                 phi_in[pos] = trans;
1501                         } else {
1502                                 /* value available */
1503                                 phi_in[pos] = pred_info->avail;
1504                         }
1505                         DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1506                 }
1507
1508                 /* We do not connect tuples as they will be connected automatically
1509                    by the corresponding projections. */
1510                 if (get_irn_mode(expr) != mode_T) {
1511
1512                         phi = new_r_Phi(block, arity, phi_in, mode);
1513                         DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1514
1515                         /* this value is now available through the new phi */
1516                         ir_valueset_replace(info->avail_out, value, phi);
1517                         ir_valueset_insert(info->new_set, value, phi);
1518                 }
1519                 free(phi_in);
1520
1521                 /* already optimized this value in this block */
1522                 ir_valueset_insert(info->antic_done, value, expr);
1523                 env->changes |= 1;
1524         }
1525
1526 #if BETTER_GREED
1527         /* TODO Unfinished
1528            Better greed first determines which values are redundant
1529            and decides then which to take.
1530            insert_nodes needs to be split up for that. The cycle could be
1531            for each block: flag redundant nodes,
1532            use heuristic to adjust these flags (also consider antic_done),
1533            do insert nodes.
1534            This way we could decide if we hoist a non redundant node,
1535            if all its successors are redundant.
1536            Or we might try to minimize the cut along hoisted nodes and their
1537            non redundant successors.
1538          */
1539         if (env->changes) {
1540                 plist_element_t *it;
1541                 /* iterate in inverse topological order */
1542                 foreach_plist(stack, it) {
1543                         ir_node *irn   = (ir_node *)plist_element_get_value(it);
1544                         ir_node *block = get_nodes_block(irn);
1545                         int      j;
1546                         char     redundant = 1;
1547
1548                         /* does irn only have redundant successors? */
1549
1550                         assert(get_irn_outs_computed(irn));
1551
1552                         for (j = get_irn_n_outs(irn) - 1; j >= 0; --j) {
1553                                 ir_node *succ = get_irn_out(irn, j);
1554
1555                                 /* if succ and irn are in the same block */
1556                                 if (get_nodes_block(succ) == block && is_redundant(block, succ)) {
1557                                         continue;
1558                                 } else {
1559                                         redundant = 0;
1560                                         break;
1561                                 }
1562                         }
1563
1564                         if (redundant)
1565                                 flag_redundant(irn, 1);
1566                 }
1567         }
1568         plist_free(stack);
1569 #endif
1570
1571 }
1572
1573 #if HOIST_HIGH
1574 static void update_new_set_walker(ir_node *block, void *ctx)
1575 {
1576         pre_env *env = (pre_env*)ctx;
1577
1578         if (! is_Block(block))
1579                 return;
1580         if (block == env->start_block)
1581                 return;
1582
1583         update_new_set(block, get_Block_idom(block));
1584 }
1585
1586 /**
1587  * Domtree block walker to insert nodes into the highest
1588  * possible block. .
1589  *
1590  */
1591 static void hoist_high(ir_node *block, void *ctx)
1592 {
1593         pre_env                *env        = (pre_env*)ctx;
1594         block_info             *curr_info;
1595         ir_valueset_iterator_t  iter;
1596         ir_node                *expr;
1597         ir_node                *value;
1598         int                     arity      = get_irn_arity(block);
1599
1600         if (! is_Block(block))
1601                 return;
1602
1603         curr_info = get_block_info(block);
1604
1605         if (curr_info->new_set)
1606                 ir_valueset_del(curr_info->new_set);
1607         curr_info->new_set = ir_valueset_new(16);
1608
1609         if (block == env->start_block)
1610                 return;
1611
1612         if (arity < 2)
1613                 return;
1614
1615         DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1616
1617         /* foreach entry optimized by insert node phase */
1618         foreach_valueset(curr_info->antic_done, value, expr, iter) {
1619                 int pos;
1620
1621                 if (is_memop(expr) || is_Proj(expr))
1622                         continue;
1623
1624                 DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
1625
1626                 /* visit hoisted expressions */
1627                 for (pos = 0; pos < arity; ++pos) {
1628                         /* standard target is predecessor block */
1629                         ir_node    *target     = get_Block_cfgpred_block(block, pos);
1630                         block_info *pred_info  = get_block_info(target);
1631                         ir_node    *avail;
1632                         ir_node    *new_target;
1633                         ir_node    *trans_expr;
1634                         ir_node    *trans_value;
1635                         ir_node    *dom;
1636                         int         avail_arity;
1637                         int         i;
1638                         unsigned    nest_depth;
1639                         block_info *dom_info;
1640
1641                         /* get phi translated value */
1642                         trans_expr  = get_translated(target, expr);
1643                         trans_value = identify(trans_expr);
1644                         avail       = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1645
1646                         /* get the used expr on this path */
1647
1648                         /* TODO when does this happen? */
1649                         if (avail == NULL)
1650                                 continue;
1651
1652                         avail_arity = get_irn_arity(avail);
1653
1654                         value = identify(avail);
1655
1656                         /* anticipation border */
1657                         new_target  = NULL;
1658                         nest_depth  = get_loop_depth(get_irn_loop(target));
1659                         dom         = target;
1660
1661                         while (dom && dom != environ->start_block) {
1662
1663                                 dom = get_Block_idom(dom);
1664                                 dom_info = get_block_info(dom);
1665                                 DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
1666
1667                                 /* TODO antic_in means hoistable above block,
1668                                    but we need 'hoistable into block'. */
1669
1670                                 /* check if available node ist still anticipated and clean */
1671                                 if (! ir_valueset_lookup(dom_info->antic_in, value)) {
1672                                         DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
1673                                         break;
1674                                 }
1675
1676                                 nest_depth = get_loop_depth(get_irn_loop(dom));
1677
1678                                 /* do not hoist into loops */
1679                                 if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
1680                                         DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
1681                                         /* not a suitable location */
1682                                         continue;
1683                                 }
1684
1685                                 /* check if operands die */
1686
1687                                 /* check for uses on current path*/
1688                                 for (i = 0; i < avail_arity; i++) {
1689                                         ir_node *pred       = get_irn_n(avail, i);
1690                                         ir_node *pred_value = identify(pred);
1691                                         int      j;
1692
1693                                         if (dom == NULL)
1694                                                 break;
1695
1696                                         DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
1697
1698                                         if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
1699                                                 DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
1700                                                 dom = NULL;
1701                                                 break;
1702                                         }
1703
1704                                         assert(get_irn_outs_computed(pred));
1705
1706                                         /* check every successor */
1707                                         for (j = get_irn_n_outs(pred) - 1; j >= 0; --j) {
1708                                                 ir_node *succ = get_irn_out(pred, j);
1709                                                 DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
1710
1711                                                 /* check only successors on current path to end */
1712                                                 if (block_dominates(dom, get_nodes_block(succ))) {
1713                                                         ir_node *succ_value = identify(succ);
1714
1715                                                         /* Do we have another user than avail?
1716                                                            Then predecessor is not dead after removal of avail. */
1717                                                         if (succ_value != value) {
1718                                                                 DB((dbg, LEVEL_4, "fail: still used in %+F\n", succ));
1719                                                                 dom = NULL;
1720                                                                 break;
1721                                                         }
1722                                                 }
1723                                         }
1724                                 }
1725                                 if (dom)
1726                                         new_target = dom;
1727                         }
1728
1729                         if (new_target) {
1730                                 block_info *target_info = get_block_info(new_target);
1731                                 int         nn_arity    = get_irn_arity(avail);
1732                                 ir_node   **in          = XMALLOCN(ir_node *, nn_arity);
1733                                 ir_node    *nn;
1734                                 int         i;
1735
1736                                 DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
1737                                 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1738
1739                                 for (i = 0; i < nn_arity; ++i) {
1740                                         ir_node *pred       = get_irn_n(avail, i);
1741                                         ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
1742                                         assert(avail_pred);
1743                                         in[i] = avail_pred;
1744                                 }
1745                                 nn = new_ir_node(
1746                                         get_irn_dbg_info(avail),
1747                                         environ->graph,
1748                                         new_target,
1749                                         get_irn_op(avail),
1750                                         get_irn_mode(avail),
1751                                         nn_arity,
1752                                         in);
1753                                 free(in);
1754
1755                                 identify_or_remember(nn);
1756                                 /* NOTE: Nodes are inserted into a dominating block and should
1757                                    be available from this point on. Currently we do not push
1758                                    the availability information through during the walk. */
1759                                 ir_valueset_insert(target_info->new_set, value, nn);
1760                         }
1761                 }
1762         }
1763 }
1764 #endif
1765
1766 /* --------------------------------------------------------
1767  * Elimination of fully redundant nodes
1768  * --------------------------------------------------------
1769  */
1770
1771 /**
1772  * Walker which finds redundant nodes using avail_out sets
1773  * and exchanges them for existing ones.
1774  * We cannot change the graph here as this would affect
1775  * the hash values of the nodes.
1776  *
1777  * @param irn  the node
1778  * @param ctx  the walker environment
1779  */
1780 static void eliminate(ir_node *irn, void *ctx)
1781 {
1782         pre_env *env = (pre_env*)ctx;
1783
1784         if (! is_Block(irn)) {
1785                 ir_node    *block = get_nodes_block(irn);
1786                 block_info *info  = get_block_info(block);
1787                 ir_node    *value = identify(irn);
1788
1789                 if (value != NULL) {
1790                         ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1791
1792                         if (expr != NULL && expr != irn) {
1793                                 elim_pair *p = OALLOC(env->obst, elim_pair);
1794
1795 #if PARTLY_ONLY
1796                                 if (get_irn_idx(expr) <= env->last_idx)
1797                                         return;
1798 #endif
1799
1800                                 p->old_node = irn;
1801                                 p->new_node = expr;
1802                                 p->next     = env->pairs;
1803                                 if (get_irn_idx(expr) > env->last_idx)
1804                                         p->reason = FS_OPT_GVN_PARTLY;
1805                                 else
1806                                         p->reason = FS_OPT_GVN_FULLY;
1807                                 env->pairs = p;
1808                                 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1809                         }
1810                 }
1811         }
1812 }  /* eliminate */
1813
1814 /**
1815  * Do all the recorded changes and optimize
1816  * newly created Phi's.
1817  *
1818  * @param pairs  list of elimination pairs
1819  */
1820 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1821 {
1822         elim_pair             *p;
1823         ir_nodeset_iterator_t  iter;
1824         ir_node               *m_phi;
1825         ir_node               *end    = environ->end_node;
1826
1827         for (p = pairs; p != NULL; p = p->next) {
1828                 /* might be already changed */
1829                 p->new_node = skip_Id(p->new_node);
1830
1831                 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1832
1833                 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1834                  * which we can optimize here */
1835                 if (is_Phi(p->new_node)) {
1836                         int      i;
1837                         ir_node *res = NULL;
1838
1839                         for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1840                                 ir_node *pred = get_irn_n(p->new_node, i);
1841
1842                                 if (pred != p->old_node) {
1843                                         if (res) {
1844                                                 res = NULL;
1845                                                 break;
1846                                         }
1847                                         res = pred;
1848                                 }
1849                         }
1850                         if (res) {
1851                                 exchange(p->new_node, res);
1852                                 p->new_node = res;
1853                         }
1854                 }
1855                 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1856
1857                 exchange(p->old_node, p->new_node);
1858         }
1859
1860         /* remove keep alive edges of unused mode_M phis */
1861         foreach_ir_nodeset(keeps, m_phi, iter) {
1862                 remove_End_keepalive(end, m_phi);
1863         }
1864 }  /* eliminate_nodes */
1865
1866 /* --------------------------------------------------------
1867  * GVN PRE pass
1868  * --------------------------------------------------------
1869  */
1870
1871 /**
1872  * Gvn_Pre algorithm.
1873  *
1874  * @param irg   the graph
1875  */
1876 static void gvn_pre(ir_graph *irg, pre_env *env)
1877 {
1878         unsigned              antic_iter;
1879         unsigned              insert_iter;
1880
1881         DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1882
1883         /* allocate block info */
1884         irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1885
1886         ir_nodehashmap_init(&value_map);
1887
1888         /* generate exp_gen */
1889         irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
1890         dump_all_expgen_sets(env->list);
1891
1892         /* compute the avail_out sets for all blocks */
1893         dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
1894
1895         /* compute the anticipated value sets for all blocks */
1896         antic_iter      = 0;
1897         env->first_iter = 1;
1898
1899         /* antic_in passes */
1900         do {
1901                 ++antic_iter;
1902                 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
1903                 env->changes = 0;
1904                 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
1905                 env->first_iter = 0;
1906                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1907         } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
1908
1909         DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
1910
1911         ir_nodeset_init(env->keeps);
1912         insert_iter       = 0;
1913         env->first_iter   = 1;
1914         env->last_idx     = get_irg_last_idx(irg);
1915         /* compute redundant expressions */
1916         do {
1917                 ++insert_iter;
1918                 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
1919                 env->changes = 0;
1920                 /* TODO topologically top down would be better; fewer iterations. */
1921                 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
1922                 env->first_iter = 0;
1923                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1924         } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
1925         DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
1926
1927 #if HOIST_HIGH
1928         /* An attempt to reduce lifetime by hoisting already hoisted values
1929            even higher, if their operands die. */
1930         dom_tree_walk_irg(irg, hoist_high, NULL, env);
1931         /* update avail_out for elimination */
1932         dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
1933 #endif
1934
1935
1936
1937         /* last step: eliminate nodes */
1938         irg_walk_graph(irg, NULL, eliminate, env);
1939         eliminate_nodes(env->pairs, env->keeps);
1940
1941         ir_nodeset_destroy(env->keeps);
1942 }
1943
1944 /**
1945  * Gvn_Pre pass for graph irg.
1946  *
1947  * @param irg   the graph
1948  */
1949 void do_gvn_pre(ir_graph *irg)
1950 {
1951         struct obstack        obst;
1952         pre_env               env;
1953         ir_nodeset_t          keeps;
1954         optimization_state_t  state;
1955         block_info           *block_info;
1956
1957         /* bads and unreachables cause too much trouble with dominance
1958            dominance
1959            loop info for endless loop detection
1960            no critical edges is GVN-PRE precondition
1961          */
1962         assure_irg_properties(irg,
1963                 IR_GRAPH_PROPERTY_NO_BADS
1964                 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
1965                 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
1966                 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
1967                 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
1968                 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
1969
1970         /* register a debug mask */
1971         FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
1972
1973         save_optimization_state(&state);
1974         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
1975
1976         environ = &env;
1977         DEBUG_ONLY(init_stats();)
1978
1979         /* setup environment */
1980         obstack_init(&obst);
1981         env.graph        = irg;
1982         env.obst         = &obst;
1983         env.list         = NULL;
1984         env.start_block  = get_irg_start_block(irg);
1985         env.end_block    = get_irg_end_block(irg);
1986         env.end_node     = get_irg_end(irg);
1987         env.pairs        = NULL;
1988         env.keeps        = &keeps;
1989
1990         /* Detect and set links of infinite loops to non-zero. */
1991         analyse_loops(irg);
1992
1993         /* Switch on GCSE. We need it to correctly compute
1994            the value of a node, which is independent from
1995            its block. */
1996         set_opt_global_cse(1);
1997         /* new_identities */
1998         if (irg->value_table != NULL)
1999                 del_pset(irg->value_table);
2000         /* initially assumed nodes in pset are 512 */
2001         irg->value_table = new_pset(compare_gvn_identities, 512);
2002
2003         /* do GVN-PRE pass */
2004         gvn_pre(irg, &env);
2005         DEBUG_ONLY(print_stats();)
2006
2007         /* clean up: delete all sets */
2008         for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2009                 free_block_info(block_info);
2010         }
2011
2012         DEBUG_ONLY(free_stats();)
2013         ir_nodehashmap_destroy(&value_map);
2014         obstack_free(&obst, NULL);
2015         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2016
2017         /* Pin the graph again.
2018            This is needed due to the use of set_opt_global_cse(1) */
2019         set_irg_pinned(irg, op_pin_state_pinned);
2020         restore_optimization_state(&state);
2021         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2022
2023         /* TODO there seem to be optimizations that try to use the existing
2024            value_table. */
2025         new_identities(irg);
2026 }
2027
2028 /* Creates an ir_graph pass for do_gvn_pre. */
2029 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2030 {
2031         return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
2032 }