better available constant handling
[libfirm] / ir / opt / gvn_pre.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Global Value Numbering Partial Redundancy Elimination
23  *          (VanDrunen Hosking 2004)
24  * @author  Michael Beck
25  * @brief
26  */
27 #include "config.h"
28
29 #include "debug.h"
30 #include "ircons.h"
31 #include "irdom.h"
32 #include "iredges.h"
33 #include "irflag.h"
34 #include "irgmod.h"
35 #include "irgopt.h"
36 #include "irgwalk.h"
37 #include "irnodehashmap.h"
38 #include "irnodeset.h"
39 #include "iropt_dbg.h"
40 #include "iroptimize.h"
41 #include "irouts.h"
42 #include "irpass.h"
43 #include "valueset.h"
44 #include "irloop.h"
45
46 #include "irgraph_t.h"
47 #include "irnode_t.h"
48 #include "iropt_t.h"
49 #include "plist.h"
50
51 #define MAX_ANTIC_ITER 10
52 #define MAX_INSERT_ITER 3
53
54 #define HOIST_HIGH 1
55 #define BETTER_GREED 0
56 #define LOADS 0
57 #define DIVMODS 0
58 #define NO_INF_LOOPS 0
59 #define NO_INF_LOOPS2 0
60
61 /** Additional info we need for every block. */
62 typedef struct block_info {
63         ir_valueset_t     *exp_gen;    /* contains this blocks clean expressions */
64         ir_valueset_t     *avail_out;  /* available values at block end */
65         ir_valueset_t     *antic_in;   /* clean anticipated values at block entry */
66         ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert_nodes() */
67         ir_valueset_t     *new_set;    /* new by hoisting made available values */
68         ir_nodehashmap_t  *trans;      /* contains translated nodes translated into block */
69         ir_node           *avail;      /* saves available node for insert_nodes */
70         int                found;      /* saves kind of availability for insert_nodes */
71         ir_node           *block;      /* block of the block_info */
72         struct block_info *next;       /* links all instances for easy access */
73 } block_info;
74
75 /**
76  * A pair of nodes to be exchanged.
77  * We have to defer the exchange because our hash-sets cannot
78  * find an already replaced node.
79  */
80 typedef struct elim_pair {
81         ir_node *old_node;      /* node that will be replaced */
82         ir_node *new_node;      /* replacement for old_node */
83         struct elim_pair *next; /* links all instances for easy access */
84         int     reason;         /* reason for the replacement */
85 } elim_pair;
86
87 /** environment for the GVN-PRE algorithm */
88 typedef struct pre_env {
89         ir_graph       *graph;        /* current graph */
90         struct obstack *obst;         /* obstack to allocate on */
91         ir_node        *start_block;  /* start block of the current graph */
92         ir_node        *end_block;    /* end block of the current graph */
93         ir_node        *end_node;     /* end node of the current graph */
94         block_info     *list;         /* block_info list head */
95         elim_pair      *pairs;        /* elim_pair list head */
96         ir_nodeset_t   *keeps;        /* a list of to be removed phis to kill their keep alive edges */
97         unsigned        last_idx;     /* last node index of input graph */
98         char            changes;      /* flag for fixed point iterations - non-zero if changes occurred */
99         char            first_iter;   /* non-zero for first fixed point iteration */
100         char            insert_phase; /* non-zero for first fixed point iteration */
101 } pre_env;
102
103 static pre_env *environ;
104 /* custom GVN value map */
105 static ir_nodehashmap_t value_map;
106
107 /* debug module handle */
108 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
109
110 #ifdef DEBUG_libfirm
111
112 /* --------------------------------------------------------
113  * Statistics
114  * --------------------------------------------------------
115  */
116
117 typedef struct gvnpre_statistics {
118         int replaced;
119         int partially;
120         int fully;
121         int loads;
122         int divmods;
123         int hoist_high;
124         int first_iter_found;
125         int antic_iterations;
126         int insert_iterations;
127         int infinite_loops;
128 } gvnpre_statistics;
129
130 gvnpre_statistics *gvnpre_stats = NULL;
131
132 static void init_stats()
133 {
134         gvnpre_stats = XMALLOCZ(gvnpre_statistics);
135 }
136
137 static void free_stats()
138 {
139         free(gvnpre_stats);
140         gvnpre_stats = NULL;
141 }
142
143 static void print_stats()
144 {
145         gvnpre_statistics *stats = gvnpre_stats;
146         DB((dbg, LEVEL_1, "replaced             : %d\n", stats->replaced));
147         DB((dbg, LEVEL_1, "antic_in iterations  : %d\n", stats->antic_iterations));
148         DB((dbg, LEVEL_1, "insert iterations    : %d\n", stats->insert_iterations));
149         DB((dbg, LEVEL_1, "infinite loops       : %d\n", stats->infinite_loops));
150         DB((dbg, LEVEL_1, "fully redundant      : %d\n", stats->fully));
151         DB((dbg, LEVEL_1, "partially redundant  : %d\n", stats->partially));
152         DB((dbg, LEVEL_1, "  loads                : %d\n", stats->loads));
153         DB((dbg, LEVEL_1, "  Divs/Mods            : %d\n", stats->divmods));
154         DB((dbg, LEVEL_1, "  hoist high           : %d\n", stats->hoist_high));
155         DB((dbg, LEVEL_1, "  first iteration      : %d\n", stats->first_iter_found));
156 }
157
158 #define set_stats(var, value) (var)=(value)
159 #define inc_stats(var)        ((var)+=1)
160
161 /* --------------------------------------------------------
162  * Dump valuesets
163  * --------------------------------------------------------
164  */
165
166 /**
167  * Dump a value set.
168  *
169  * @param set    the set to dump
170  * @param txt    a text to describe the set
171  * @param block  the owner block of the set
172  */
173 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
174 {
175         ir_valueset_iterator_t iter;
176         ir_node *value, *expr;
177         int i;
178
179         DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
180         i = 0;
181         foreach_valueset(set, value, expr, iter) {
182                 if ((i & 3) == 3)
183                         DB((dbg, LEVEL_2, "\n"));
184                 if (value != expr)
185                         DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
186                 else
187                         DB((dbg, LEVEL_2, " %+F,", expr));
188                 ++i;
189         }
190         DB((dbg, LEVEL_2, "\n}\n"));
191 }  /* dump_value_set */
192
193 /**
194  * Dump all exp_gen value sets.
195  *
196  * @param list  the list of block infos to retrieve the sets from
197  */
198 static void dump_all_expgen_sets(block_info *list)
199 {
200         block_info *block_info;
201
202         for (block_info = list; block_info != NULL; block_info = block_info->next) {
203                 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
204         }
205 }
206
207 #else
208
209 #define dump_all_expgen_sets(list)
210 #define dump_value_set(set, txt, block)
211
212 #endif /* DEBUG_libfirm */
213
214 /* --------------------------------------------------------
215  * GVN Functions
216  * --------------------------------------------------------
217  */
218
219 /**
220  * Compares node collisions in valuetable.
221  * Modified identities_cmp().
222  */
223 static int compare_gvn_identities(const void *elt, const void *key)
224 {
225         ir_node *a = (ir_node *)elt;
226         ir_node *b = (ir_node *)key;
227         int i, irn_arity_a;
228
229         if (a == b) return 0;
230
231         /* phi nodes kill predecessor values and are always different */
232         if (is_Phi(a) || is_Phi(b))
233                 return 1;
234
235         if ((get_irn_op(a) != get_irn_op(b)) ||
236             (get_irn_mode(a) != get_irn_mode(b))) return 1;
237
238         /* compare if a's in and b's in are of equal length */
239         irn_arity_a = get_irn_arity(a);
240         if (irn_arity_a != get_irn_arity(b))
241                 return 1;
242
243         /* blocks are never the same */
244         if (is_Block(a) || is_Block(b))
245                 return 1;
246
247         /* TODO depends on load optimization */
248         if (get_irn_pinned(a) == op_pin_state_pinned) {
249                 /* for pinned nodes, the block inputs must be equal */
250                 if (get_irn_n(a, -1) != get_irn_n(b, -1))
251                         return 1;
252         } else {
253                 /* we need global values independent from their blocks */
254                 assert(get_opt_global_cse());
255         }
256
257         /* compare a->in[0..ins] with b->in[0..ins] */
258         for (i = 0; i < irn_arity_a; ++i) {
259                 ir_node *pred_a = get_irn_n(a, i);
260                 ir_node *pred_b = get_irn_n(b, i);
261                 if (pred_a != pred_b) {
262                         /* if both predecessors are CSE neutral they might be different */
263                         if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
264                                 return 1;
265                 }
266         }
267
268         /*
269          * here, we already now that the nodes are identical except their
270          * attributes
271          */
272         if (a->op->ops.node_cmp_attr)
273                 return a->op->ops.node_cmp_attr(a, b);
274
275         return 0;
276 }
277
278 /**
279  * Identify does a lookup in the GVN valuetable.
280  * To be used when no new GVN values are to be created.
281  *
282  * @param e  a node representing an expression
283  * @return a node representing the value
284  */
285 static ir_node *identify(ir_node *irn)
286 {
287         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
288         if (value)
289                 return value;
290         /* irn represents a new value */
291         return identify_remember(irn);
292 }
293
294 /**
295  * remember() adds node irn to the GVN valuetable.
296  * Identify_remember only identifies values of nodes with the
297  * same predecessor nodes (not values). By creating a node from the predecessor
298  * values, a true valuetree is built. Phis kill their predecessor value,
299  * so no circular dependencies need to be resolved.
300  *
301  * TODO Improvement:
302  *      Maybe this could be implemented with a custom node hash that takes
303  *      phi nodes and true values (instead of predecessors) into account,
304  *      resulting in value numbers.
305  *
306  * @param irn  a node representing an expression
307  * @return     the value of the expression
308  */
309 static ir_node *remember(ir_node *irn)
310 {
311         int       arity   = get_irn_arity(irn);
312         int       i;
313         int       changed = 0;
314         ir_node **in      = XMALLOCN(ir_node *, arity);
315         ir_node  *value;
316
317         DB((dbg, LEVEL_4, "Remember %+F\n", irn));
318
319         for (i = 0; i < arity; ++i) {
320                 ir_node *pred       = get_irn_n(irn, i);
321                 ir_node *pred_value = identify(pred);
322
323                 /* phi will be translated anyway, so kill the predecessor values
324                    also prevents circular dependencies */
325                 if (is_Phi(pred)) {
326                         /* every phi represents its own value */
327                         in[i] = pred;
328                         continue;
329                 }
330
331                 /* predecessor is not its value representation */
332                 if (pred != pred_value)
333                         changed = 1;
334                 in[i] = pred_value;
335         }
336
337         if (changed) {
338                 /* create representative for */
339                 ir_node *nn = new_ir_node(
340                         get_irn_dbg_info(irn),
341                         get_irn_irg(irn),
342                         get_nodes_block(irn),
343                         get_irn_op(irn),
344                         get_irn_mode(irn),
345                         get_irn_arity(irn),
346                         in);
347                 copy_node_attr(environ->graph, irn, nn);
348
349                 /* now the value can be determined */
350                 value = identify_remember(nn);
351         } else {
352                 value = identify_remember(irn);
353         }
354         free(in);
355
356         ir_nodehashmap_insert(&value_map, irn, value);
357
358         return value;
359 }
360
361 /** When the value map has been built we may lookup expressions
362  *  and remember them if new.
363  */
364 static ir_node *identify_or_remember(ir_node *irn)
365 {
366         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
367         if (value)
368                 return value;
369         else
370                 return remember(irn);
371 }
372
373 /* --------------------------------------------------------
374  * Block info
375  * --------------------------------------------------------
376  */
377
378 /**
379  * Allocate block info for block block.
380  *
381  * @param block   the block
382  * @param env     the environment
383  */
384 static void alloc_block_info(ir_node *block, pre_env *env)
385 {
386         block_info *info = OALLOC(env->obst, block_info);
387
388         set_irn_link(block, info);
389         info->exp_gen    = ir_valueset_new(16);
390         info->avail_out  = ir_valueset_new(16);
391         info->antic_in   = ir_valueset_new(16);
392         info->antic_done = ir_valueset_new(16);
393         info->trans = XMALLOC(ir_nodehashmap_t);
394         ir_nodehashmap_init(info->trans);
395
396         info->new_set = NULL;
397         info->avail   = NULL;
398         info->block   = block;
399         info->found   = 1;
400
401         info->next = env->list;
402         env->list  = info;
403 }  /* alloc_block_info */
404
405 static void free_block_info(block_info *block_info)
406 {
407         ir_valueset_del(block_info->exp_gen);
408         ir_valueset_del(block_info->avail_out);
409         ir_valueset_del(block_info->antic_in);
410         if (block_info->trans) {
411                 ir_nodehashmap_destroy(block_info->trans);
412                 free(block_info->trans);
413         }
414         if (block_info->new_set)
415                 ir_valueset_del(block_info->new_set);
416 }
417
418 /**
419  * Bottom up walker that ensures that every block gets a block info.
420  *
421  * @param irn   the node
422  * @param ctx   the environment
423  */
424 static void block_info_walker(ir_node *irn, void *ctx)
425 {
426         if (is_Block(irn)) {
427                 pre_env *env = (pre_env*)ctx;
428                 alloc_block_info(irn, env);
429         }
430 }
431
432 /**
433  * Returns the block info of a block.
434  */
435 static block_info *get_block_info(ir_node *block)
436 {
437         return (block_info*)get_irn_link(block);
438 }
439
440 /* --------------------------------------------------------
441  * Infinite loop analysis
442  * --------------------------------------------------------
443  */
444
445 /**
446  * Walker to set block marks and loop links to 0.
447  */
448 static void clear_block_mark_loop_link(ir_node *block, void *env)
449 {
450         (void) env;
451
452         if (is_Block(block)) {
453                 set_Block_mark(block, 0);
454                 set_loop_link(get_irn_loop(block), NULL);
455         }
456 }
457
458 /**
459  * Returns non-zero if block is part of real loop loop.
460  */
461
462 static unsigned in_loop(ir_node *block, ir_loop *loop)
463 {
464         ir_loop *l     = get_irn_loop(block);
465         ir_loop *outer = get_irg_loop(environ->graph);
466
467         while (l != loop) {
468                 /* loop tree root is not a loop */
469                 if (l == NULL || l == outer)
470                         return 0;
471                 l = get_loop_outer_loop(l);
472         }
473         return 1;
474 }
475
476 /**
477  * Returns the outermost real loop of loop.
478  */
479 static ir_loop *get_loop_outermost(ir_loop *loop)
480 {
481         ir_loop *outer = get_irg_loop(environ->graph);
482         ir_loop *l     = loop;
483         ir_loop *last  = NULL;
484
485         while(l != outer) {
486                 last = l;
487                 l = get_loop_outer_loop(l);
488         }
489         return last;
490 }
491
492 /**
493  * Topologic bottom-up walker sets links of infinite loops to non-zero.
494  * Block marks are used to flag blocks reachable (from end) on the one hand,
495  * on the other hand they are set if the block is not part of an infinite loop.
496  */
497 static void infinite_loop_walker(ir_node *block, void *env)
498 {
499         int arity;
500         int i;
501         (void) env;
502
503         if (! is_Block(block))
504                 return;
505
506         /* start block has no predecessors */
507         if (block == environ->start_block)
508                 return;
509
510         arity = get_irn_arity(block);
511
512         /* block not part of a real loop: no infinite loop */
513         if (get_irn_loop(block) == get_irg_loop(environ->graph))
514                 set_Block_mark(block, 1);
515
516         if (get_Block_mark(block)) {
517                 /* reachable block: mark all cf predecessors */
518                 for (i = 0; i < arity; ++i) {
519                         ir_node *pred = get_Block_cfgpred_block(block, i);
520                         if (is_Bad(pred))
521                                 continue;
522                         set_Block_mark(pred, 1);
523                 }
524         } else {
525                 /* We are in a real loop and see an unreachable block. */
526                 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
527
528                 /* flag loop as infinite */
529                 set_loop_link(outermost_loop, outermost_loop);
530                 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
531
532                 /* The cf predecessors are unreachable, but can never be part of
533                    an infinite loop, because we just reached them. So we set the
534                    blockmark to prevent triggering the infinite loop detection. */
535
536                 /* passing information to the cf predecessors */
537                 for (i = 0; i < arity; ++i) {
538                         ir_node *pred = get_Block_cfgpred_block(block, i);
539
540                         if (is_Bad(pred))
541                                 continue;
542
543                         /* If our cf predecessor is in the same endless loop,
544                            it is also unreachable. */
545                         if (in_loop(pred, outermost_loop)) {
546                                 set_Block_mark(pred, 0);
547                         } else {
548                                 /* When we leave the unreachable loop, we artificially
549                                    declare the cf predecessor reachable. */
550                                 set_Block_mark(pred, 1);
551                         }
552                 }
553         }
554 }
555
556 /**
557  * Sets loop links of outermost infinite loops to non-zero.
558  */
559 static void analyse_loops(ir_graph *irg)
560 {
561         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
562
563         /* reset block mark and loop links */
564         irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
565
566         /* mark end block reachable */
567         set_Block_mark(get_irg_end_block(irg), 1);
568         irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
569
570         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
571 }
572
573 #if NO_INF_LOOPS || NO_INF_LOOPS2
574 /**
575  * Returns non-zero if block is part of an infinite loop.
576  */
577 static unsigned is_in_infinite_loop(ir_node *block)
578 {
579         ir_loop *loop;
580
581         assert(is_Block(block));
582         loop = get_irn_loop(block);
583         assert(loop);
584
585         loop = get_loop_outermost(loop);
586         if (loop)
587                 return (get_loop_link(loop) != NULL);
588         else
589                 return 0;
590 }
591 #endif
592
593 /* --------------------------------------------------------
594  * GVN-PRE Exp_gen
595  * --------------------------------------------------------
596  */
597
598 /**
599  * Returns non-zero if a node is movable and a possible candidate for PRE.
600  */
601 static unsigned is_nice_value(ir_node *n)
602 {
603         ir_mode *mode = get_irn_mode(n);
604
605         /* positive group */
606
607         if (is_Phi(n))
608                 return 1;
609
610 #if LOADS || DIVMODS
611         if (is_Proj(n))
612                 return 1;
613 #endif
614
615 #if LOADS
616         if (is_Load(n))
617                 return (get_Load_volatility(n) == volatility_non_volatile);
618 #endif
619
620         /* negative group */
621
622         if (get_irn_pinned(n) == op_pin_state_pinned)
623                 return 0;
624
625         if (! mode_is_data(mode)) {
626                 if (! is_Div(n) && ! is_Mod(n))
627                         return 0;
628         }
629         return 1;
630 }
631
632 /**
633  * Checks if a node n is clean in block block for exp_gen.
634  *
635  * @param n      the node
636  * @param block  the block
637  * @return non-zero value for clean node
638  */
639 static unsigned is_clean_in_block_expgen(ir_node *n, ir_node *block)
640 {
641         int i, arity;
642
643         if (is_Phi(n))
644                 return 1;
645
646         if (! is_nice_value(n))
647                 return 0;
648
649         arity = get_irn_arity(n);
650         for (i = 0; i < arity; ++i) {
651                 ir_node *pred = get_irn_n(n, i);
652
653                 /* sufficient for exp_gen because block is always node's block */
654                 if (get_nodes_block(pred) != block)
655                         continue;
656
657                 /* predecessor is in block,
658                    so it needs to be clean */
659                 if (get_irn_link(pred)) {
660                         continue;
661                 } else {
662                         DB((dbg, LEVEL_3, "unclean %+F because pred %+F unclean\n", n, pred));
663                         return 0;
664                 }
665         }
666         return 1;
667 }
668
669 /**
670  * Topological walker puts nodes in top-down topological order into exp_gen set.
671  * Assumed to walk blockwise and nodewise topologically top-down.
672  *
673  * @param irn    the node
674  * @param ctx    the environment
675  */
676 static void topo_walker(ir_node *irn, void *ctx)
677 {
678         ir_node    *block;
679         block_info *info;
680         ir_node    *value;
681         (void) ctx;
682
683         if (is_Block(irn))
684                 return;
685
686         /* GVN step: remember the value. Predecessors need to be visited. */
687         value = remember(irn);
688
689         block = get_nodes_block(irn);
690         info  = get_block_info(block);
691
692         /* values for avail_out do not need to be clean */
693         ir_valueset_insert(info->avail_out, value, irn);
694
695         /* no need to put constants into the sets: they are always redundant */
696         if (! is_nice_value(irn) || is_irn_constlike(irn))
697                 return;
698
699         if (is_clean_in_block_expgen(irn, block)) {
700                 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
701
702                 ir_valueset_insert(info->exp_gen, value, irn);
703                 /* flag irn as clean*/
704                 set_irn_link(irn, irn);
705         } else {
706                 /* flag irn as not clean */
707                 set_irn_link(irn, NULL);
708         }
709 }
710
711 /* --------------------------------------------------------
712  * GVN-PRE Antic_in
713  * --------------------------------------------------------
714  */
715
716 /**
717  * Gets result of nodes phi translation into block.
718  *
719  * @param node   the node
720  * @param block  the target block
721  *
722  * @return a phi translation of node node into block block or NULL
723  */
724 static ir_node *get_translated(ir_node *block, ir_node *node)
725 {
726         if (is_irn_constlike(node))
727                 return node;
728
729         return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
730 }
731
732 /**
733  * Saves result of phi translation of node into predecessor
734  * at pos of block succ.
735  *
736  * @param node   the node
737  * @param succ   the successor of the translation target block
738  * @param pos    the position of the predecessor block
739  * @param trans  the translation result
740  *
741  */
742 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
743 {
744         if (is_irn_constlike(node))
745                 return;
746         /* insert or replace */
747         ir_nodehashmap_insert(map, node, trans);
748 }
749
750 /**
751  * Checks if node is hoistable into block.
752  *
753  * A clean node in block block can be hoisted above block succ.
754  * A node is not clean if its representative is killed in block block.
755  * The node can still be hoisted into block block.
756  *
757  * @param n      the node to be checked
758  * @param block  the block to be hoisted into
759  * @returns      block which node can be hoisted above
760  */
761 static ir_node *is_hoistable_above(ir_node *node, ir_node *block, ir_node *succ)
762 {
763         int i;
764         int arity = get_irn_arity(node);
765
766         /* make sure that node can be hoisted above block */
767
768         if (is_irn_constlike(node))
769                 return block;
770
771         for (i = 0; i < arity; ++i) {
772                 block_info *info       = get_block_info(block);
773                 ir_node    *pred       = get_irn_n(node, i);
774                 ir_node    *pred_value = identify(pred);
775                 ir_node    *pred_block = get_nodes_block(pred);
776
777                 /* remove TMP_GEN */
778                 /* is predecessor not known to be clean */
779                 if (! ir_valueset_lookup(info->antic_in, pred_value)) {
780                         /* is it possible to hoist node above block? */
781                         if (! block_strictly_dominates(pred_block, block)) {
782                                 DB((dbg, LEVEL_3, "unclean %+F: %+F (%+F) not antic\n", node, pred, pred_value));
783                                 return succ;
784                         }
785                 }
786
787                 /* predecessor value to be antic in is not enough.
788                    if we didn't translate the exact representative we cannot translate */
789                 if (! get_translated(pred_block, pred)) {
790                         if (! block_strictly_dominates(pred_block, block)) {
791                                 DB((dbg, LEVEL_3, "unclean %+F: %+F (%+F) lost of representative\n", node, pred, pred_value));
792                                 return NULL;
793                         }
794                 }
795         }
796         return block;
797 }
798
799 #if LOADS || DIVMODS
800 /* Helper function to compare the values of pred and avail_pred. */
801 static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
802 {
803         ir_node *avail_value  = identify(avail_pred);
804         ir_node *trans_pred   = get_translated_pred(pred, block, pos);
805         ir_node *value;
806
807         if (trans_pred == NULL)
808                 trans_pred = pred;
809         value = identify(trans_pred);
810
811         DB((dbg, LEVEL_3, "manual compare %+F  %+F\n", pred, avail_pred));
812
813         return (value == avail_value);
814 }
815 #endif
816
817 #if LOADS
818 /**
819  * Does phi translation for redundant load nodes only.
820  * Returns NULL for non-redundant loads, which need to be phi translated.
821  * Loads are compared by comparing their pointer values,
822  * and assuring that they are adjacent.
823  * This is equivalent to what phi_translation does,
824  * when a new node is created and then GCSE optimized resulting
825  * in an already available node.
826  */
827 static ir_node *phi_translate_load(ir_node *load, ir_node *block, int pos)
828 {
829         ir_node *mem   = get_Load_mem(load);
830         ir_node *trans = get_translated_pred(mem, block, pos);
831
832         if (trans == NULL)
833                 trans = mem;
834
835         /* no partial redundancy if this is a mode_M phi */
836         if (is_Proj(trans)) {
837                 /* The last memory operation in predecessor block */
838                 ir_node *avail_load = get_Proj_pred(trans);
839
840                 /* memop is a load with matching type */
841                 if (is_Load(avail_load) &&
842                             get_Load_mode(load) == get_Load_mode(avail_load)) {
843
844                         unsigned match = match_pred(get_Load_ptr(load), get_Load_ptr(avail_load), block, pos);
845
846                         if (match)
847                                 return avail_load;
848                 }
849         }
850         return NULL;
851 }
852 #endif
853
854 #if DIVMODS
855 /**
856  * Does phi translation for redundant Div/Mod nodes only.
857  * Returns NULL for non-redundant node, which needs to be phi translated.
858  */
859 static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
860 {
861         ir_node *mem   = get_memop_mem(divmod);
862         ir_node *trans = get_translated_pred(mem, block, pos);
863
864         if (trans == NULL)
865                 trans = mem;
866
867         /* no partial redundancy if this is a mode_M phi */
868         if (is_Proj(trans)) {
869                 /* The last memory operation in predecessor block */
870                 ir_node *avail_op = get_Proj_pred(trans);
871
872                 if (get_irn_op(divmod) == get_irn_op(avail_op)) {
873                         unsigned left, right;
874
875                         if (is_Div(avail_op)) {
876                                 if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
877                                     get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
878
879                                         left  = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
880                                         right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
881
882                                         if (left && right)
883                                                 return avail_op;
884                                 }
885                         } else if (is_Mod(avail_op)) {
886                             if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
887
888                                         left  = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
889                                         right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
890
891                                         if (left && right)
892                                                 return avail_op;
893                                 }
894                         }
895                 }
896         }
897         return NULL;
898 }
899 #endif
900
901 /**
902  * Translates an expression above a Phi.
903  *
904  * @param node        the node
905  * @param block       the block the node is translated into
906  * @param pos         the input number of the destination block
907  *
908  * @return a node representing the translated value
909  */
910 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_node *pred_block)
911 {
912         int       i;
913         int       arity;
914         ir_node **in;
915         ir_node  *nn;
916         int       needed;
917
918         if (is_Phi(node)) {
919                 if (get_nodes_block(node) == block)
920                         return get_Phi_pred(node, pos);
921                 /* this phi does not need translation */
922                 return node;
923         }
924
925         arity = get_irn_arity(node);
926
927 #if LOADS
928         if (is_Load(node)) {
929                 ir_node *avail_load = phi_translate_load(node, block, pos);
930                 if (avail_load)
931                         return avail_load;
932         }
933 #endif
934
935 #if DIVMODS
936         if (is_Div(node) || is_Mod(node)) {
937                 ir_node *avail_op = phi_translate_divmod(node, block, pos);
938                 if (avail_op)
939                         return avail_op;
940         }
941 #endif
942
943         needed = 0;
944         /* insert phase enforces translation for previously not translated nodes */
945         if (environ->insert_phase)
946                 needed = 1;
947
948         in = XMALLOCN(ir_node *, arity);
949
950         for (i = 0; i < arity; ++i) {
951                 ir_node *pred       = get_irn_n(node, i);
952                 /* we cannot find this value in antic_in, because the value
953                    has (possibly) changed! */
954                 ir_node *pred_trans = get_translated(pred_block, pred);
955                 ir_node *expr;
956
957                 if (pred_trans == NULL) {
958                         /* reasons for this are:
959                            1. pred not dominated by block: use predecessor.
960                            2. dangling-represenatative: predecessor not translated.
961                            We cannot phi translate, it will be the wrong value. */
962                         expr = pred;
963                 } else {
964                         expr = pred_trans;
965                         /* predecessor value changed, so translation is needed */
966                         if (identify(expr) != identify(pred))
967                                 needed |= 1;
968                 }
969
970                 /* We need to build a value tree. But values cannot be translated,
971                    expressions can be. So we translate the expressions and let GVN
972                    identify their values. */
973                 in[i] = expr;
974         }
975
976         if (! needed)
977                 return node;
978
979         DB((dbg, LEVEL_3, "Translate\n"));
980         /* copy node to represent the new value.
981            We do not translate nodes that do not need translation,
982            so we use the newly created nodes as value representatives only.
983            Their block is not important, because we create new ones during
984            insert_nodes(). */
985         nn = new_ir_node(
986                 get_irn_dbg_info(node),
987                 environ->graph,
988                 get_Block_cfgpred_block(block, pos),
989                 get_irn_op(node),
990                 get_irn_mode(node),
991                 arity,
992                 in);
993         free(in);
994         /* We need the attribute copy here, because the Hash value of a
995            node might depend on it. */
996         copy_node_attr(environ->graph, node, nn);
997         /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
998            because it already uses availability. */
999
1000         DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
1001         return nn;
1002 }
1003
1004 /**
1005  * Block-walker, computes Antic_in(block).
1006  * Builds a value tree out of the graph by translating values
1007  * over phi nodes.
1008  * Although the algorithm works on values, constructing the value tree
1009  * depends on actual representations through nodes and their actual
1010  * predecessors.
1011  * By using only one representative (GVN-PRE) for every value, we have to be
1012  * careful not to break the topological order during translation. If a node
1013  * needs to be translated, but its predecessor - a representative in the same
1014  * antic_in scope - has not been, then it has to be killed.
1015  * one-representative-problem: Two blocks yield the same value through
1016  *    completely different calculations. The value is common, but the
1017  *    representative cannot be phi translated, because the predecessors
1018  *    haven't been.
1019  *    If a value is in exp_gen and also in antic_in of the successor,
1020  *    a similar situation sets in.
1021  *
1022  * By using antic_in exclusively, we lose information about translated nodes.
1023  * We need to permanently keep the translated nodes list.
1024  * For insert_nodes() we actually need antic_out. But antic_out is not usable,
1025  * because every value had to be cross-looked up in every available set.
1026  *
1027  * If we used antic_out, the translated nodes list would not be necessary
1028  * permanently, because instead of looking for translated(antic_in) we could
1029  * just use antic_out of the predecessor block.
1030  *
1031  * @param block  the block
1032  * @param ctx    the walker environment
1033  */
1034 static void compute_antic(ir_node *block, void *ctx)
1035 {
1036         pre_env                *env       = (pre_env*)ctx;
1037         block_info             *succ_info;
1038         block_info             *info;
1039         ir_node                *succ;
1040         ir_node                *value;
1041         ir_node                *expr;
1042         size_t                  size;
1043         ir_valueset_iterator_t  iter;
1044         int                     n_succ;
1045
1046         /* filter blocks from topological walker */
1047         if (! is_Block(block))
1048                 return;
1049
1050         /* the end block has no successor */
1051         if (block == env->end_block)
1052                 return;
1053
1054         info = get_block_info(block);
1055         /* track changes */
1056         size = ir_valueset_size(info->antic_in);
1057         n_succ = get_Block_n_cfg_outs(block);
1058
1059         if (env->first_iter) {
1060                 foreach_valueset(info->exp_gen, value, expr, iter) {
1061                         ir_valueset_insert(info->antic_in, value, expr);
1062                 }
1063         }
1064
1065         /* successor might have phi nodes */
1066         if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
1067                 succ      = get_Block_cfg_out(block, 0);
1068                 int pos   = get_Block_cfgpred_pos(succ, block);
1069                 succ_info = get_block_info(succ);
1070
1071                 /* initialize translated set */
1072                 if (env->first_iter) {
1073                         info->trans = XMALLOC(ir_nodehashmap_t);
1074                         ir_nodehashmap_init(info->trans);
1075                 }
1076
1077                 foreach_valueset(succ_info->antic_in, value, expr, iter) {
1078                         /* we translate the expression over the phi node,
1079                            remember() builds the value tree */
1080                         ir_node *trans       = phi_translate(expr, succ, pos, block);
1081                         /* create new value if necessary */
1082                         ir_node *trans_value = identify_or_remember(trans);
1083                         ir_node *represent;
1084                         ir_node *hoistable;
1085
1086                         DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1087
1088                         /* on value change (phi present) we need the translated node
1089                            to represent the new value for possible further translation. */
1090                         if (value != trans_value)
1091                                 represent = trans;
1092                         else
1093                                 represent = expr;
1094
1095                         hoistable = is_hoistable_above(expr, block, succ);
1096                         //if (is_clean_in_block_antic(expr, block)) {
1097                         if (hoistable != NULL) {
1098                                 /* Where is the difference between replace and insert?
1099                                    If we replace, we might use a representative with non translated*/
1100                                 ir_valueset_insert(info->antic_in, trans_value, represent);
1101                                 DB((dbg, LEVEL_3, "Translated %+F repr %+F\n", expr, represent));
1102
1103                                 if (hoistable == block)
1104                                         ir_valueset_set_link(info->antic_in, trans_value, trans_value);
1105                                 else
1106                                         /* hoistable into block but not above */
1107                                         ir_valueset_set_link(info->antic_in, trans_value, NULL);
1108                         }
1109                         /* during insert we use the translated node, because it may be
1110                            hoisted into block whilst being not anticipated there. */
1111                         set_translated(info->trans, expr, represent);
1112                 }
1113         } else if (n_succ > 1) {
1114                 int         i;
1115                 ir_node    *common     = NULL;
1116                 ir_node    *succ0      = get_Block_cfg_out(block, 0);
1117                 block_info *succ0_info = get_block_info(succ0);
1118
1119                 /* disjoint of antic_ins */
1120                 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1121                         ir_node *hoistable;
1122
1123                         /* iterate over remaining successors */
1124                         for (i = 1; i < n_succ; ++i) {
1125                                 ir_node    *succ      = get_Block_cfg_out(block, i);
1126                                 block_info *succ_info = get_block_info(succ);
1127
1128                                 /* value in antic_in? */
1129                                 common = ir_valueset_lookup(succ_info->antic_in, value);
1130                                 if (common == NULL)
1131                                         break;
1132                         }
1133
1134                         hoistable = is_hoistable_above(expr, block, succ0);
1135
1136                         if (common && hoistable != NULL) {
1137                                 ir_valueset_insert(info->antic_in, value, expr);
1138
1139                                 if (hoistable == block)
1140                                         ir_valueset_set_link(info->antic_in, value, value);
1141                                 else
1142                                         /* hoistable into block but not above */
1143                                         ir_valueset_set_link(info->antic_in, value, NULL);
1144
1145                                 DB((dbg, LEVEL_3, "common and clean %+F(%+F) in %+F\n", expr, value, block));
1146                         }
1147                 }
1148         }
1149
1150         DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1151
1152         if (size != ir_valueset_size(info->antic_in))
1153                 env->changes |= 1;
1154 }
1155
1156 /* --------------------------------------------------------
1157  * Main algorithm Avail_out
1158  * --------------------------------------------------------
1159  */
1160
1161 /**
1162  * Computes Avail_out(block):
1163  *
1164  * Avail_in(block)  = Avail_out(dom(block))
1165  * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1166  *
1167  * Precondition:
1168  *  This function must be called in the top-down topological order:
1169  *  Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1170  *
1171  * @param block   the block
1172  * @param ctx     walker context
1173  */
1174 static void compute_avail_top_down(ir_node *block, void *ctx)
1175 {
1176         pre_env    *env   = (pre_env*)ctx;
1177         block_info *info;
1178
1179         if (block == env->end_block)
1180                 return;
1181
1182         info  = get_block_info(block);
1183
1184         /* Add all nodes from the immediate dominator.
1185            This ensures that avail_out contains the leader. */
1186         if (block != env->start_block) {
1187                 ir_node                *dom_block = get_Block_idom(block);
1188                 block_info             *dom_info  = get_block_info(dom_block);
1189                 ir_node                *value;
1190                 ir_node                *expr;
1191                 ir_valueset_iterator_t  iter;
1192
1193                 foreach_valueset(dom_info->avail_out, value, expr, iter) {
1194                         /* use first available expr as leader */
1195                         ir_valueset_replace(info->avail_out, value, expr);
1196                 }
1197         }
1198
1199         DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1200 }
1201
1202 /* --------------------------------------------------------
1203  * Main algorithm redundancy detection
1204  * --------------------------------------------------------
1205  */
1206
1207 /**
1208  * Flags node irn redundant depending on redundant parameter.
1209  */
1210 static void flag_redundant(ir_node *irn, unsigned redundant)
1211 {
1212         if (redundant) {
1213                 set_irn_link(irn, irn);
1214         } else {
1215                 set_irn_link(irn, NULL);
1216         }
1217 }
1218
1219 /*
1220  * Returns redundant flag of node irn.
1221  */
1222 static unsigned is_redundant(ir_node *irn)
1223 {
1224         return (get_irn_link(irn) != NULL);
1225 }
1226
1227 /**
1228  * Returns a valid mode if the value of expr is a partially redundant value.
1229  *
1230  * @param block   the block
1231  * @param expr    the expression
1232  *
1233  * @return mode of the expression if it is partially redundant else NULL
1234  */
1235 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1236 {
1237         ir_node *first_avail         = NULL;
1238         int      pos;
1239         int      arity               = get_irn_arity(block);
1240         int      fully_redundant     = 1;
1241         int      partially_redundant = 0;
1242         ir_mode *mode                = NULL;
1243
1244         DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1245
1246         /* for each predecessor blocks */
1247         for (pos = 0; pos < arity; ++pos) {
1248                 block_info *pred_info;
1249                 ir_node    *pred_block  = get_Block_cfgpred_block(block, pos);
1250                 ir_node    *trans_expr;
1251                 ir_node    *trans_value;
1252                 ir_node    *avail_expr;
1253
1254                 /* ignore bad blocks. */
1255                 if (is_Bad(pred_block))
1256                         continue;
1257
1258                 pred_info  = get_block_info(pred_block);
1259                 trans_expr = get_translated(pred_block, expr);
1260                 trans_value = identify(trans_expr);
1261
1262                 avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1263                 /* value might be available through a not yet existing constant */
1264                 if (avail_expr == NULL && is_irn_constlike(trans_expr)) {
1265                         /* limit range of new constants */
1266                         ir_mode   *cmode  = get_irn_mode(trans_expr);
1267                         ir_tarval *upper = new_tarval_from_long(127, cmode);
1268                         ir_tarval *lower = new_tarval_from_long(-127, cmode);
1269                         ir_tarval *c     = get_Const_tarval(trans_expr);
1270
1271                         if (tarval_cmp(lower, c) != ir_relation_greater_equal &&
1272                                 tarval_cmp(c, upper) != ir_relation_greater_equal) {
1273                                 avail_expr = NULL;
1274                         } else {
1275                                 avail_expr = trans_expr;
1276                         }
1277             }
1278
1279                 DB((dbg, LEVEL_3, "avail_expr %+F\n", avail_expr));
1280
1281                 if (avail_expr == NULL) {
1282                         pred_info->avail = trans_expr;
1283                         pred_info->found = 0;
1284                         fully_redundant  = 0;
1285                 } else {
1286                         /* expr is available */
1287                         pred_info->avail    = avail_expr;
1288                         pred_info->found    = 1;
1289                         mode                = get_irn_mode(avail_expr);
1290                         partially_redundant = 1;
1291
1292                         if (first_avail == NULL)
1293                                 first_avail = avail_expr;
1294                         else if (first_avail != avail_expr)
1295                                 /* Multiple different expressions are available */
1296                                 fully_redundant = 0;
1297
1298                         DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1299                 }
1300         }
1301
1302 #if BETTER_GREED
1303         /* value is redundant from last iteration,
1304            but has not been removed from antic_in (is not optimized) */
1305         if (! environ->first_iter && is_redundant(expr))
1306                 return mode;
1307 #endif
1308
1309         /* If it is not the same value already existing along every predecessor
1310        and it is defined by some predecessor then it is partially redundant. */
1311         if (! partially_redundant || fully_redundant)
1312                 return NULL;
1313         return mode;
1314 }
1315
1316 /**
1317  * Updates the new_set of a block by adding the new_set of
1318  * the immediate dominating block.
1319  *
1320  * @param  the block
1321  */
1322 static void update_new_set(ir_node *block, ir_node *idom)
1323 {
1324         ir_node                *value;
1325         ir_node                *expr;
1326         ir_valueset_iterator_t  iter;
1327         block_info             *curr_info = get_block_info(block);
1328         block_info             *idom_info = get_block_info(idom);
1329         int                     updated   = 0;
1330
1331         DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1332         foreach_valueset(idom_info->new_set, value, expr, iter) {
1333                 /* inherit new_set from immediate dominator */
1334                 ir_valueset_insert(curr_info->new_set, value, expr);
1335                 /* replace in avail_out */
1336                 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1337         }
1338 #ifdef DEBUG_libfirm
1339         if (updated)
1340                 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1341 #endif
1342 } /* update_new_set */
1343
1344 /**
1345  * Checks if hoisting irn is greedy.
1346  * Greedy hoisting means that there are non partially redundant nodes
1347  * hoisted. This happens if a partially redundant node has
1348  * non redundant predecessors.
1349  */
1350 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1351 {
1352         int arity = get_irn_arity(irn);
1353         int i;
1354
1355         for (i = 0; i < arity; ++i) {
1356                 ir_node *pred  = get_irn_n(irn, i);
1357
1358                 if (! block_strictly_dominates(get_nodes_block(pred), block) && ! is_redundant(pred))
1359                         return 1;
1360         }
1361         return 0;
1362 }
1363
1364 /**
1365  * Perform insertion of partially redundant values.
1366  * For every block node, do the following:
1367  * 1.  Propagate the NEW_SETS of the dominator into the current block.
1368  * If the block has multiple predecessors,
1369  *     2a. Iterate over the ANTIC expressions for the block to see if
1370  *         any of them are partially redundant.
1371  *     2b. If so, insert them into the necessary predecessors to make
1372  *         the expression fully redundant.
1373  *     2c. Insert a new Phi merging the values of the predecessors.
1374  *     2d. Insert the new Phi, and the new expressions, into the
1375  *         NEW_SETS set.
1376  *
1377  * @param block  the block
1378  * @param ctx    the walker environment
1379  */
1380 static void insert_nodes(ir_node *block, void *ctx)
1381 {
1382         pre_env                *env    = (pre_env*)ctx;
1383         int                     arity  = get_irn_arity(block);
1384         ir_node                *value;
1385         ir_node                *expr;
1386         block_info             *info;
1387         ir_node                *idom;
1388         int                     pos;
1389         ir_valueset_iterator_t  iter;
1390
1391         /* only blocks */
1392         if (! is_Block(block))
1393                 return;
1394
1395         /* ensure that even the start block has a new_set */
1396         info = get_block_info(block);
1397         if (info->new_set)
1398                 ir_valueset_del(info->new_set);
1399         info->new_set = ir_valueset_new(16);
1400
1401         if (block == env->start_block)
1402                 return;
1403
1404         DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1405
1406         idom = get_Block_idom(block);
1407         update_new_set(block, idom);
1408
1409         /* process only path joining blocks */
1410         if (arity < 2) {
1411                 return;
1412         }
1413
1414 #if BETTER_GREED
1415         plist_t *stack = plist_new();
1416 #endif
1417
1418         /* This is the main reason we choose to use antic_in over antic_out;
1419            we may iterate over every anticipated value first and not
1420            over the predecessor blocks. */
1421         foreach_valueset(info->antic_in, value, expr, iter) {
1422                 ir_mode  *mode;
1423                 ir_node  *phi;
1424                 ir_node **phi_in;
1425
1426                 /* already done? */
1427                 if (ir_valueset_lookup(info->antic_done, value))
1428                         continue;
1429
1430                 /* hoistable above block? */
1431                 if (ir_valueset_get_link(info->antic_in, value) == NULL)
1432                         continue;
1433
1434 #if BETTER_GREED
1435                 plist_insert_front(stack, expr);
1436 #endif
1437
1438                 /* filter phi nodes from antic_in */
1439                 if (is_Phi(expr)) {
1440                         flag_redundant(expr, 1);
1441                         continue;
1442                 }
1443
1444                 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1445
1446                 /* A value computed in the dominator is totally redundant.
1447                    Hence we have nothing to insert. */
1448                 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1449                         flag_redundant(expr, 1);
1450                         DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1451                         DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1452
1453                         continue;
1454                 }
1455
1456 #if !BETTER_GREED
1457                 if (is_hoisting_greedy(expr, block))
1458                         continue;
1459 #endif
1460
1461                 mode = is_partially_redundant(block, expr, value);
1462                 if (mode == NULL) {
1463                         flag_redundant(expr, 0);
1464                         continue;
1465                 } else {
1466                         flag_redundant(expr, 1);
1467                 }
1468
1469 #if BETTER_GREED
1470                 if (is_hoisting_greedy(expr, block))
1471                         continue;
1472 #endif
1473
1474 #if LOADS || DIVMODS
1475                 /* save old mode_M phis to remove keepalive edges later */
1476                 if (is_memop(expr)) {
1477                         ir_node *mem = get_memop_mem(expr);
1478                         if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1479                                 ir_nodeset_insert(env->keeps, mem);
1480                         }
1481                 }
1482 #endif
1483
1484 #ifdef DEBUG_libfirm
1485                 if (! is_Proj(expr)) {
1486                         if (env->first_iter)
1487                                 inc_stats(gvnpre_stats->first_iter_found);
1488                         inc_stats(gvnpre_stats->partially);
1489                 }
1490                 if (is_Load(expr))
1491                         inc_stats(gvnpre_stats->loads);
1492                 else if (is_Div(expr) || is_Mod(expr))
1493                         inc_stats(gvnpre_stats->divmods);
1494 #endif
1495
1496                 phi_in = XMALLOCN(ir_node *, arity);
1497
1498                 /* for each predecessor block */
1499                 for (pos = 0; pos < arity; ++pos) {
1500                         ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1501                         block_info *pred_info;
1502
1503                         /* ignore bad blocks. */
1504                         if (is_Bad(pred_block)) {
1505                                 ir_graph *irg = get_irn_irg(pred_block);
1506                                 phi_in[pos] = new_r_Bad(irg, mode);
1507                                 continue;
1508                         }
1509                         pred_info = get_block_info(pred_block);
1510
1511                         if (! pred_info->found) {
1512                                 ir_node *trans = get_translated(pred_block, expr);
1513
1514                                 assert(trans);
1515                                 if (trans == expr) {
1516                                         /* has been translated if ancestor had a phi and was translated */
1517                                         /* also non phi descendants can be redundant, but have
1518                                            not yet been (phi) translated. */
1519                                         trans = phi_translate(expr, block, pos, pred_block);
1520                                         set_translated(pred_info->trans, expr, trans);
1521                                 }
1522
1523                                 DB((dbg, LEVEL_3, "Use new %+F in %+F because expr %+F not available\n", trans, pred_block, expr));
1524
1525                                 /* value is now available in target block through trans
1526                                    insert (not replace) because it has not been available */
1527                                 ir_valueset_insert(pred_info->avail_out, value, trans);
1528                                 phi_in[pos] = trans;
1529                         } else {
1530                                 /* value available */
1531                                 phi_in[pos] = pred_info->avail;
1532                         }
1533                         DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1534                 }
1535
1536                 /* We do not connect tuples as they will be connected automatically
1537                    by the corresponding projections. */
1538                 if (get_irn_mode(expr) != mode_T) {
1539
1540                         phi = new_r_Phi(block, arity, phi_in, mode);
1541                         DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1542
1543                         /* this value is now available through the new phi */
1544                         ir_valueset_replace(info->avail_out, value, phi);
1545                         ir_valueset_insert(info->new_set, value, phi);
1546                 }
1547                 free(phi_in);
1548
1549 #if 0
1550                 /* remove from antic_in, because expr is not anticipated
1551                    anymore in this block */
1552                 ir_valueset_remove_iterator(info->antic_in, &iter);
1553 #endif
1554                 ir_valueset_insert(info->antic_done, value, expr);
1555
1556                 env->changes |= 1;
1557         }
1558
1559 #if BETTER_GREED
1560         if (env->changes) {
1561                 /* iterate in inverse topological order */
1562                 plist_element_t *it;
1563                 foreach_plist(stack, it) {
1564                         ir_node *irn = (ir_node *)plist_element_get_value(it);
1565                         int j;
1566                         char redundant = 1;
1567                         ir_node *block = get_nodes_block(irn);
1568
1569                         /* does irn only have redundant successors? */
1570
1571                         if (! get_irn_outs_computed(irn))
1572                                 continue;
1573
1574                         for (j = get_irn_n_outs(irn) - 1; j >= 0; --j) {
1575                                 ir_node *succ = get_irn_out(irn, j);
1576
1577                                 /* if succ and irn are in the same block */
1578                                 if (get_nodes_block(succ) == block && is_redundant(succ)) {
1579                                         continue;
1580                                 } else {
1581                                         redundant = 0;
1582                                         break;
1583                                 }
1584                         }
1585
1586                         if (redundant)
1587                                 flag_redundant(irn, 1);
1588                 }
1589         }
1590         plist_free(stack);
1591 #endif
1592
1593 }
1594
1595 #if HOIST_HIGH
1596 /**
1597  * Dom tree block walker to insert nodes into the highest
1598  * possible block where their .
1599  *
1600  */
1601 static void hoist_high(ir_node *block, void *ctx)
1602 {
1603         pre_env                *env        = (pre_env*)ctx;
1604         block_info             *curr_info;
1605         ir_valueset_iterator_t  iter;
1606         ir_node                *expr;
1607         ir_node                *value;
1608         int                     arity      = get_irn_arity(block);
1609
1610         if (! is_Block(block))
1611                 return;
1612
1613         if (arity < 2)
1614                 return;
1615
1616         if (block == env->start_block)
1617                 return;
1618
1619         curr_info = get_block_info(block);
1620
1621         DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1622
1623         /* foreach entry optimized by insert_nodes */
1624         foreach_valueset(curr_info->antic_done, value, expr, iter) {
1625                 int pos;
1626
1627                 if (is_memop(expr) || is_Proj(expr))
1628                         continue;
1629
1630                 for (pos = 0; pos < arity; ++pos) {
1631                         /* standard target is predecessor block */
1632                         ir_node    *target     = get_Block_cfgpred_block(block, pos);
1633                         block_info *pred_info  = get_block_info(target);
1634                         ir_node    *avail;
1635                         ir_node    *new_target;
1636                         ir_node    *last_target;
1637                         ir_node    *trans_expr;
1638                         ir_node    *trans_value;
1639                         ir_node    *dom;
1640                         int         avail_arity;
1641                         int         i;
1642                         unsigned    nest_depth;
1643
1644                         /* get phi translated value */
1645                         trans_expr  = get_translated(target, expr);
1646                         trans_value = identify(trans_expr);
1647                         avail = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1648
1649                         if (avail == NULL)
1650                                 avail = trans_expr;
1651
1652                         value = identify(avail);
1653                         avail_arity = get_irn_arity(avail);
1654
1655                         /* anticipation border */
1656                         new_target  = NULL;
1657                         last_target = NULL;
1658                         nest_depth  = get_loop_depth(get_irn_loop(target));
1659                         dom         = target;
1660
1661                         while(dom != environ->start_block) {
1662                                 dom = get_Block_idom(dom);
1663
1664                                 if (is_Bad(dom))
1665                                         break;
1666
1667                                 /* do not hoist into loops */
1668                                 if (get_loop_depth(get_irn_loop(dom)) > nest_depth)
1669                                         break;
1670
1671                                 if (get_loop_depth(get_irn_loop(dom)) < nest_depth)
1672                                         last_target = dom;
1673
1674                                 nest_depth = get_loop_depth(get_irn_loop(dom));
1675
1676                                 /* antic_in means that the expression is clean to be
1677                                    hoisted above block, but still into */
1678                                 new_target = dom;
1679                                 /* check if available node ist still anticipated and clean
1680                                    (clean is part of antic) */
1681                                 if (! ir_valueset_lookup(get_block_info(dom)->antic_in, value))
1682                                         break;
1683                         }
1684
1685                         /* No new target or does the available node already dominate the new_target? */
1686                         if (new_target) {
1687                                 DB((dbg, LEVEL_2, "leader block %+F\n", get_nodes_block(avail)));
1688                                 /* already same block or dominating?*/
1689                                 if (block_strictly_dominates(new_target, get_nodes_block(avail)))
1690                                         new_target = NULL;
1691                         }
1692
1693                         DB((dbg, LEVEL_2, "dom border %+F\n", new_target));
1694
1695                         /* check for uses of available ins on our path*/
1696                         for (i = 0; i < avail_arity; i++) {
1697                                 ir_node *pred       = get_irn_n(avail, i);
1698                                 ir_node *pred_block = get_nodes_block(avail);
1699                                 int      j;
1700
1701                                 if (new_target == NULL)
1702                                         break;
1703
1704                                 if (! get_irn_outs_computed(pred)) {
1705                                         new_target = NULL;
1706                                         break;
1707                                 }
1708
1709                                 /**/
1710                                 if (! block_strictly_dominates(pred_block, new_target)) {
1711                                         new_target = pred_block;
1712                                 }
1713
1714                                 /* outs of def*/
1715                                 for (j = get_irn_n_outs(pred) - 1; j >= 0; --j) {
1716                                         ir_node *succ = get_irn_out(pred, j);
1717
1718                                         /* on our path?*/
1719                                         /* is succ on this path? */
1720                                         if (block_strictly_dominates(get_nodes_block(succ), new_target)) {
1721                                                 ir_node *succ_value = identify(succ);
1722
1723                                                 /* pred not dead? */
1724                                                 if (succ_value != value) {
1725                                                         continue;
1726                                                 } else {
1727                                                         new_target = NULL;
1728                                                         break;
1729                                                 }
1730                                         }
1731                                 }
1732                         }
1733
1734                         /* only one usage on our path */
1735                         if (new_target) {
1736                                 /* push the available node up into */
1737                                 set_nodes_block(avail, new_target);
1738
1739                                 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1740                         }
1741                 }
1742         }
1743 }
1744 #endif
1745
1746 /* --------------------------------------------------------
1747  * Elimination of fully redundant nodes
1748  * --------------------------------------------------------
1749  */
1750
1751 /**
1752  * Walker which finds redundant nodes using avail_out sets
1753  * and exchanges them for existing ones.
1754  * We cannot change the graph here as this would affect
1755  * the hash values of the nodes.
1756  *
1757  * @param irn  the node
1758  * @param ctx  the walker environment
1759  */
1760 static void eliminate(ir_node *irn, void *ctx)
1761 {
1762         pre_env *env = (pre_env*)ctx;
1763
1764         if (! is_Block(irn)) {
1765                 ir_node    *block = get_nodes_block(irn);
1766                 block_info *info  = get_block_info(block);
1767                 ir_node    *value = identify(irn);
1768
1769                 if (value != NULL) {
1770                         ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1771
1772                         if (expr != NULL && expr != irn) {
1773                                 elim_pair *p = OALLOC(env->obst, elim_pair);
1774
1775                                 p->old_node = irn;
1776                                 p->new_node = expr;
1777                                 p->next     = env->pairs;
1778                                 if (get_irn_idx(expr) >= env->last_idx)
1779                                         p->reason = FS_OPT_GVN_PARTLY;
1780                                 else
1781                                         p->reason = FS_OPT_GVN_FULLY;
1782                                 env->pairs = p;
1783                                 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1784                         }
1785                 }
1786         }
1787 }  /* eliminate */
1788
1789 /**
1790  * Do all the recorded changes and optimize
1791  * newly created Phi's.
1792  *
1793  * @param pairs  list of elimination pairs
1794  */
1795 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1796 {
1797         elim_pair             *p;
1798         ir_nodeset_iterator_t iter;
1799         ir_node               *m_phi;
1800         ir_node               *end = environ->end_node;
1801
1802         for (p = pairs; p != NULL; p = p->next) {
1803                 /* might be already changed */
1804                 p->new_node = skip_Id(p->new_node);
1805
1806                 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1807
1808                 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1809                  * which we can optimize here */
1810                 if (is_Phi(p->new_node)) {
1811                         int      i;
1812                         ir_node *res = NULL;
1813
1814                         for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1815                                 ir_node *pred = get_irn_n(p->new_node, i);
1816
1817                                 if (pred != p->old_node) {
1818                                         if (res) {
1819                                                 res = NULL;
1820                                                 break;
1821                                         }
1822                                         res = pred;
1823                                 }
1824                         }
1825                         if (res) {
1826                                 exchange(p->new_node, res);
1827                                 p->new_node = res;
1828                         }
1829                 }
1830                 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1831
1832                 exchange(p->old_node, p->new_node);
1833         }
1834
1835         /* remove keep alive edges of unused mode_M phis */
1836         foreach_ir_nodeset(keeps, m_phi, iter) {
1837                 remove_End_keepalive(end, m_phi);
1838         }
1839 }  /* eliminate_nodes */
1840
1841 /* --------------------------------------------------------
1842  * GVN PRE pass
1843  * --------------------------------------------------------
1844  */
1845
1846 /**
1847  * Gvn_Pre algorithm.
1848  *
1849  * @param irg   the graph
1850  */
1851 static void gvn_pre(ir_graph *irg, pre_env *env)
1852 {
1853         unsigned              antic_iter;
1854         unsigned              insert_iter;
1855
1856         DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1857
1858         /* allocate block info */
1859         irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1860
1861         ir_nodehashmap_init(&value_map);
1862
1863         /* generate exp_gen */
1864         irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
1865         dump_all_expgen_sets(env->list);
1866
1867         /* compute the avail_out sets for all blocks */
1868         dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
1869
1870         /* compute the anticipated value sets for all blocks */
1871         antic_iter      = 0;
1872         env->first_iter = 1;
1873
1874         /* antic_in passes */
1875         do {
1876                 ++antic_iter;
1877                 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
1878                 env->changes = 0;
1879                 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
1880                 env->first_iter = 0;
1881                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1882         } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
1883
1884         DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
1885
1886         ir_nodeset_init(env->keeps);
1887         insert_iter       = 0;
1888         env->insert_phase = 1;
1889         env->first_iter   = 1;
1890         env->last_idx     = get_irg_last_idx(irg);
1891         /* compute redundant expressions */
1892         do {
1893                 ++insert_iter;
1894                 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
1895                 env->changes = 0;
1896                 /* TODO topologically top down would be better; fewer iterations. */
1897                 dom_tree_walk_irg(irg, insert_nodes, NULL, env);
1898                 env->first_iter = 0;
1899                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1900         } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
1901         DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
1902
1903 #if HOIST_HIGH
1904         dom_tree_walk_irg(irg, hoist_high, NULL, env);
1905 #endif
1906
1907         /* last step: eliminate nodes */
1908         irg_walk_graph(irg, NULL, eliminate, env);
1909         eliminate_nodes(env->pairs, env->keeps);
1910
1911         ir_nodeset_destroy(env->keeps);
1912 }
1913
1914 /**
1915  * Gvn_Pre pass for graph irg.
1916  *
1917  * @param irg   the graph
1918  */
1919 void do_gvn_pre(ir_graph *irg)
1920 {
1921         struct obstack        obst;
1922         pre_env               env;
1923         ir_nodeset_t          keeps;
1924         optimization_state_t  state;
1925         block_info           *block_info;
1926
1927         /* bads and unreachables cause too much trouble with dominance
1928            dominance
1929            loop info for endless loop detection
1930            no critical edges is GVN-PRE precondition
1931          */
1932         assure_irg_properties(irg,
1933                 IR_GRAPH_PROPERTY_NO_BADS
1934                 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
1935                 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
1936                 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
1937                 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
1938                 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
1939
1940         /* register a debug mask */
1941         FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
1942
1943         save_optimization_state(&state);
1944         environ = &env;
1945
1946         /* edges will crash if enabled due to our allocate on other obstack trick */
1947         edges_deactivate(irg);
1948         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
1949
1950         DEBUG_ONLY(init_stats();)
1951
1952         /* setup environment */
1953         obstack_init(&obst);
1954         env.graph        = irg;
1955         env.obst         = &obst;
1956         env.list         = NULL;
1957         env.start_block  = get_irg_start_block(irg);
1958         env.end_block    = get_irg_end_block(irg);
1959         env.end_node     = get_irg_end(irg);
1960         env.pairs        = NULL;
1961         env.keeps        = &keeps;
1962         env.insert_phase = 0;
1963
1964         /* Detect and set links of infinite loops to non-zero. */
1965         analyse_loops(irg);
1966
1967         /* Switch on GCSE. We need it to correctly compute
1968            the value of a node, which is independent from
1969            its block. */
1970         set_opt_global_cse(1);
1971         /* new_identities */
1972         if (irg->value_table != NULL)
1973                 del_pset(irg->value_table);
1974         /* initially assumed nodes in pset are 512 */
1975         irg->value_table = new_pset(compare_gvn_identities, 512);
1976
1977         /* do GVN-PRE pass */
1978         gvn_pre(irg, &env);
1979         DEBUG_ONLY(print_stats();)
1980
1981         /* clean up: delete all sets */
1982         for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
1983                 free_block_info(block_info);
1984         }
1985
1986         DEBUG_ONLY(free_stats();)
1987         ir_nodehashmap_destroy(&value_map);
1988         obstack_free(&obst, NULL);
1989         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
1990
1991         /* Pin the graph again.
1992            This is needed due to the use of set_opt_global_cse(1) */
1993         set_irg_pinned(irg, op_pin_state_pinned);
1994         restore_optimization_state(&state);
1995         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
1996
1997         /* TODO there are optimizations that try to use the existing value_table */
1998         new_identities(irg);
1999 }
2000
2001 /* Creates an ir_graph pass for do_gvn_pre. */
2002 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2003 {
2004         return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
2005 }