becopyheur4: Clean up co_mst_irn_init().
[libfirm] / ir / opt / gvn_pre.c
1 /*
2  * This file is part of libFirm.
3  * Copyright (C) 2012 University of Karlsruhe.
4  */
5
6 /**
7  * @file
8  * @brief   Global Value Numbering Partial Redundancy Elimination
9  *          (VanDrunen Hosking 2004)
10  * @author  Michael Beck
11  * @brief
12  */
13 #include "config.h"
14
15 #include "debug.h"
16 #include "ircons.h"
17 #include "irdom.h"
18 #include "iredges.h"
19 #include "irflag.h"
20 #include "irgmod.h"
21 #include "irgopt.h"
22 #include "irgwalk.h"
23 #include "irnodehashmap.h"
24 #include "irnodeset.h"
25 #include "iropt_dbg.h"
26 #include "iroptimize.h"
27 #include "irouts.h"
28 #include "irpass.h"
29 #include "valueset.h"
30 #include "irloop.h"
31 #include "firmstat_t.h"
32
33 #include "irgraph_t.h"
34 #include "irnode_t.h"
35 #include "iropt_t.h"
36 #include "plist.h"
37
38 /* suggested by GVN-PRE authors */
39 #define MAX_ANTIC_ITER 10
40 #define MAX_INSERT_ITER 3
41
42 /* Stops antic iteration from processing endless loops. */
43 #define IGNORE_INF_LOOPS 1
44 /* Stops antic information to flow over infinite loop backedge */
45 #define NO_INF_LOOPS 0
46
47 /* Attempt to reduce register pressure and reduce code size
48    for hoisted nodes. */
49 #define HOIST_HIGH 1
50 #define COMMON_DOM 1
51
52 /* Seamless implementation of handling loads and generally memory
53    dependent nodes with GVN-PRE. */
54 #define LOADS 1
55 #define DIVMODS 0
56
57 /* Experimental */
58
59 /* Only optimize node directly after phi node if node is only successor. */
60 #define MIN_CUT 0
61
62 /* GVN is very limited. This enables optimize_node during value recognition.
63    GVN ist still very limited, since a+1+b+1 doesn't equal a+b+2.
64    TODO Broken for yet unknown reasons. */
65 #define OPTIMIZE_NODES 0
66
67
68 /** Additional info we need for every block. */
69 typedef struct block_info {
70         ir_valueset_t     *exp_gen;    /* contains this blocks clean expressions */
71         ir_valueset_t     *avail_out;  /* available values at block end */
72         ir_valueset_t     *antic_in;   /* clean anticipated values at block entry */
73         ir_valueset_t     *antic_done; /* keeps elements of antic_in after insert nodes phase */
74         ir_valueset_t     *new_set;    /* new by hoisting made available values */
75         ir_nodehashmap_t  *trans;      /* contains translated nodes translated into block */
76         ir_node           *avail;      /* saves available node for insert node phase */
77         int                found;      /* saves kind of availability for insert_node phase */
78         ir_node           *block;      /* block of the block_info */
79         struct block_info *next;       /* links all instances for easy access */
80 } block_info;
81
82 /**
83  * A pair of nodes to be exchanged.
84  * We have to defer the exchange because there are still needed references
85  * to certain nodes.
86  */
87 typedef struct elim_pair {
88         ir_node *old_node;      /* node that will be replaced */
89         ir_node *new_node;      /* replacement for old_node */
90         struct elim_pair *next; /* links all instances for easy access */
91         int     reason;         /* reason for the replacement */
92 } elim_pair;
93
94 /** environment for the GVN-PRE algorithm */
95 typedef struct pre_env {
96         ir_graph       *graph;        /* current graph */
97         struct obstack  obst;         /* obstack to allocate on */
98         ir_node        *start_block;  /* start block of the current graph */
99         ir_node        *end_block;    /* end block of the current graph */
100         ir_node        *end_node;     /* end node of the current graph */
101         block_info     *list;         /* block_info list head */
102         elim_pair      *pairs;        /* elim_pair list head */
103         ir_nodeset_t   *keeps;        /* a list of to be removed phis to kill their keep alive edges */
104         unsigned        last_idx;     /* last node index of input graph */
105         char            changes;      /* flag for fixed point iterations - non-zero if changes occurred */
106         char            first_iter;   /* non-zero for first fixed point iteration */
107         int             iteration;    /* iteration counter */
108 #if OPTIMIZE_NODES
109         pset           *value_table;   /* standard value table*/
110         pset           *gvnpre_values; /* gvnpre value table */
111 #endif
112 } pre_env;
113
114 static pre_env *environ;
115
116 /* custom GVN value map */
117 static ir_nodehashmap_t value_map;
118
119 /* debug module handle */
120 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
121
122 #ifdef DEBUG_libfirm
123
124 /* --------------------------------------------------------
125  * Statistics
126  * --------------------------------------------------------
127  */
128
129 typedef struct gvnpre_statistics {
130         int replaced;
131         int partially;
132         int fully;
133         int loads;
134         int divmods;
135         int hoist_high;
136         int first_iter_found;
137         int antic_iterations;
138         int insert_iterations;
139         int infinite_loops;
140 } gvnpre_statistics;
141
142 static gvnpre_statistics *gvnpre_stats = NULL;
143
144 static void init_stats(void)
145 {
146         gvnpre_stats = XMALLOCZ(gvnpre_statistics);
147 }
148
149 static void free_stats(void)
150 {
151         free(gvnpre_stats);
152         gvnpre_stats = NULL;
153 }
154
155 static void print_stats(void)
156 {
157         gvnpre_statistics *stats = gvnpre_stats;
158         DB((dbg, LEVEL_1, "replaced             : %d\n", stats->replaced));
159         DB((dbg, LEVEL_1, "antic_in iterations  : %d\n", stats->antic_iterations));
160         DB((dbg, LEVEL_1, "insert iterations    : %d\n", stats->insert_iterations));
161         DB((dbg, LEVEL_1, "infinite loops       : %d\n", stats->infinite_loops));
162         DB((dbg, LEVEL_1, "fully redundant      : %d\n", stats->fully));
163         DB((dbg, LEVEL_1, "partially redundant  : %d\n", stats->partially));
164         DB((dbg, LEVEL_1, "  loads                : %d\n", stats->loads));
165         DB((dbg, LEVEL_1, "  Divs/Mods            : %d\n", stats->divmods));
166         DB((dbg, LEVEL_1, "  hoist high           : %d\n", stats->hoist_high));
167         DB((dbg, LEVEL_1, "  first iteration      : %d\n", stats->first_iter_found));
168 }
169
170 #define set_stats(var, value) (var)=(value)
171 #define inc_stats(var)        ((var)+=1)
172
173 /* --------------------------------------------------------
174  * Dump valuesets
175  * --------------------------------------------------------
176  */
177
178 /**
179  * Dump a value set.
180  *
181  * @param set    the set to dump
182  * @param txt    a text to describe the set
183  * @param block  the owner block of the set
184  */
185 static void dump_value_set(ir_valueset_t *set, const char *txt, ir_node *block)
186 {
187         ir_valueset_iterator_t iter;
188         ir_node *value, *expr;
189         int i;
190
191         DB((dbg, LEVEL_2, "%s(%+F) = {\n", txt, block));
192         i = 0;
193         foreach_valueset(set, value, expr, iter) {
194                 if ((i & 3) == 3)
195                         DB((dbg, LEVEL_2, "\n"));
196                 if (value != expr)
197                         DB((dbg, LEVEL_2, " %+F(%+F),", expr, value));
198                 else
199                         DB((dbg, LEVEL_2, " %+F,", expr));
200                 ++i;
201         }
202         DB((dbg, LEVEL_2, "\n}\n"));
203 }
204
205 /**
206  * Dump all exp_gen value sets.
207  *
208  * @param list  the list of block infos to retrieve the sets from
209  */
210 static void dump_all_expgen_sets(block_info *list)
211 {
212         block_info *block_info;
213
214         for (block_info = list; block_info != NULL; block_info = block_info->next) {
215                 dump_value_set(block_info->exp_gen, "[Exp_gen]", block_info->block);
216         }
217 }
218
219 #else
220
221 #define dump_all_expgen_sets(list)
222 #define dump_value_set(set, txt, block)
223
224 #endif /* DEBUG_libfirm */
225
226 /* --------------------------------------------------------
227  * GVN Functions
228  * --------------------------------------------------------
229  */
230
231 /**
232  * Compares node collisions in valuetable.
233  * Modified identities_cmp().
234  */
235 static int compare_gvn_identities(const void *elt, const void *key)
236 {
237         ir_node *a = (ir_node *)elt;
238         ir_node *b = (ir_node *)key;
239         int i, irn_arity_a;
240
241         if (a == b) return 0;
242
243         /* phi nodes kill predecessor values and are always different */
244         if (is_Phi(a) || is_Phi(b))
245                 return 1;
246
247         /* memops are not the same, even if we want to optimize them
248            we have to take the order in account */
249         if (is_memop(a) || is_memop(b) ||
250             get_irn_mode(a) == mode_T ||
251             get_irn_mode(b) == mode_T) {
252                 /* Loads with the same predecessors are the same value;
253                    this should only happen after phi translation. */
254                 if ((! is_Load(a) || ! is_Load(b)) && (! is_Store(a) || ! is_Store(b)))
255                         return 1;
256         }
257
258         if ((get_irn_op(a) != get_irn_op(b)) ||
259             (get_irn_mode(a) != get_irn_mode(b))) return 1;
260
261         /* compare if a's in and b's in are of equal length */
262         irn_arity_a = get_irn_arity(a);
263         if (irn_arity_a != get_irn_arity(b))
264                 return 1;
265
266         /* blocks are never the same */
267         if (is_Block(a) || is_Block(b))
268                 return 1;
269
270         /* should only be used with gcse enabled */
271         assert(get_opt_global_cse());
272
273         /* compare a->in[0..ins] with b->in[0..ins] */
274         for (i = 0; i < irn_arity_a; ++i) {
275                 ir_node *pred_a = get_irn_n(a, i);
276                 ir_node *pred_b = get_irn_n(b, i);
277                 if (pred_a != pred_b) {
278                         if (!is_irn_cse_neutral(pred_a) || !is_irn_cse_neutral(pred_b))
279                                 return 1;
280                 }
281         }
282
283         /*
284          * here, we already now that the nodes are identical except their
285          * attributes
286          */
287         if (a->op->ops.node_cmp_attr)
288                 return a->op->ops.node_cmp_attr(a, b);
289
290         return 0;
291 }
292
293 /**
294  * Identify does a lookup in the GVN valuetable.
295  * To be used when no new GVN values are to be created.
296  *
297  * @param e  a node representing an expression
298  * @return a node representing the value
299  */
300 static ir_node *identify(ir_node *irn)
301 {
302         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
303         if (value)
304                 return value;
305         /* irn represents a new value, so return the leader */
306         return identify_remember(irn);
307 }
308
309 /**
310  * remember() adds node irn to the GVN valuetable.
311  * Identify_remember only identifies values of nodes with the
312  * same predecessor nodes (not values). By creating a node from the predecessor
313  * values/leaders, a true valuetree is built. Phis kill their predecessor value,
314  * so no circular dependencies need to be resolved.
315  *
316  * TODO Improvement:
317  *      Maybe this could be implemented with a custom node hash that takes
318  *      phi nodes and true values (instead of predecessors) into account,
319  *      resulting in value numbers.
320  * TODO This unnecessarily also handles nodes like calls, which are never equal.
321  *
322  * @param irn  a node representing an expression
323  * @return     the value of the expression
324  */
325 static ir_node *remember(ir_node *irn)
326 {
327         int       arity   = get_irn_arity(irn);
328         int       i;
329         int       changed = 0;
330         ir_node **in      = XMALLOCN(ir_node *, arity);
331         ir_node  *value;
332
333         for (i = 0; i < arity; ++i) {
334                 ir_node *pred       = get_irn_n(irn, i);
335                 /* value and leader at the same time */
336                 ir_node *pred_value = identify(pred);
337
338                 /* phi will be translated anyway, so kill the predecessor values
339                    this also prevents circular dependencies */
340                 if (is_Phi(pred)) {
341                         /* every phi represents its own value */
342                         in[i] = pred;
343                         continue;
344                 }
345
346                 /* predecessor is not its value representation/the leader */
347                 if (pred != pred_value)
348                         changed = 1;
349                 in[i] = pred_value;
350         }
351
352         if (changed && !is_memop(irn) && get_irn_mode(irn) != mode_X) {
353                 /* create representative for */
354                 ir_node *nn = new_ir_node(
355                         get_irn_dbg_info(irn),
356                         get_irn_irg(irn),
357                         get_nodes_block(irn),
358                         get_irn_op(irn),
359                         get_irn_mode(irn),
360                         get_irn_arity(irn),
361                         in);
362                 copy_node_attr(environ->graph, irn, nn);
363
364 #if OPTIMIZE_NODES
365                 /* TODO optimize_node() uses the valuetable and thus the custom
366                    identify_cmp() and will fail trying. */
367                 environ->graph->value_table = environ->value_table;
368                 nn = optimize_node(nn);
369                 environ->graph->value_table = environ->gvnpre_values;
370 #endif
371
372                 /* now the value can be determined because the
373                    predecessors are the leaders */
374                 value = identify_remember(nn);
375         } else {
376                 value = identify_remember(irn);
377         }
378         free(in);
379
380         DB((dbg, LEVEL_4, "Remember %+F as value %+F\n", irn, value));
381         ir_nodehashmap_insert(&value_map, irn, value);
382
383         return value;
384 }
385
386 /**
387  * When the value map has been built we may lookup expressions
388  * and remember them if new.
389  */
390 static ir_node *identify_or_remember(ir_node *irn)
391 {
392         ir_node *value = ir_nodehashmap_get(ir_node, &value_map, irn);
393         if (value)
394                 return value;
395         else
396                 return remember(irn);
397 }
398
399 /* --------------------------------------------------------
400  * Block info
401  * --------------------------------------------------------
402  */
403
404 /**
405  * Allocate block info for block block.
406  *
407  * @param block   the block
408  * @param env     the environment
409  */
410 static void alloc_block_info(ir_node *block, pre_env *env)
411 {
412         block_info *info = OALLOC(&env->obst, block_info);
413
414         set_irn_link(block, info);
415         info->exp_gen    = ir_valueset_new(16);
416         info->avail_out  = ir_valueset_new(16);
417         info->antic_in   = ir_valueset_new(16);
418         info->antic_done = ir_valueset_new(16);
419         info->trans = XMALLOC(ir_nodehashmap_t);
420         ir_nodehashmap_init(info->trans);
421
422         info->new_set = NULL;
423         info->avail   = NULL;
424         info->block   = block;
425         info->found   = 1;
426
427         info->next = env->list;
428         env->list  = info;
429 }
430
431 static void free_block_info(block_info *block_info)
432 {
433         ir_valueset_del(block_info->exp_gen);
434         ir_valueset_del(block_info->avail_out);
435         ir_valueset_del(block_info->antic_in);
436         if (block_info->trans) {
437                 ir_nodehashmap_destroy(block_info->trans);
438                 free(block_info->trans);
439         }
440         if (block_info->new_set)
441                 ir_valueset_del(block_info->new_set);
442 }
443
444 /**
445  * Bottom up walker that ensures that every block gets a block info.
446  *
447  * @param irn   the node
448  * @param ctx   the environment
449  */
450 static void block_info_walker(ir_node *irn, void *ctx)
451 {
452         if (is_Block(irn)) {
453                 pre_env *env = (pre_env*)ctx;
454                 alloc_block_info(irn, env);
455         }
456 }
457
458 /**
459  * Returns the block info of a block.
460  */
461 static block_info *get_block_info(ir_node *block)
462 {
463         return (block_info*)get_irn_link(block);
464 }
465
466 /* --------------------------------------------------------
467  * Infinite loop analysis
468  * --------------------------------------------------------
469  */
470
471 /**
472  * Walker to set block marks and loop links to 0.
473  */
474 static void clear_block_mark_loop_link(ir_node *block, void *env)
475 {
476         (void) env;
477
478         if (is_Block(block)) {
479                 set_Block_mark(block, 0);
480                 set_loop_link(get_irn_loop(block), NULL);
481         }
482 }
483
484 /**
485  * Returns non-zero if block is part of real loop loop.
486  */
487
488 static unsigned in_loop(ir_node *block, ir_loop *loop)
489 {
490         ir_loop *l     = get_irn_loop(block);
491         ir_loop *outer = get_irg_loop(environ->graph);
492
493         while (l != loop) {
494                 /* loop tree root is not a loop */
495                 if (l == NULL || l == outer)
496                         return 0;
497                 l = get_loop_outer_loop(l);
498         }
499         return 1;
500 }
501
502 /**
503  * Returns the outermost real loop of loop.
504  */
505 static ir_loop *get_loop_outermost(ir_loop *loop)
506 {
507         ir_loop *outer = get_irg_loop(environ->graph);
508         ir_loop *l     = loop;
509         ir_loop *last  = NULL;
510
511         while(l != outer) {
512                 last = l;
513                 l = get_loop_outer_loop(l);
514         }
515         return last;
516 }
517
518 /**
519  * Topologic bottom-up walker sets links of infinite loops to non-zero.
520  * Block marks are used to flag blocks reachable (from end) on the one hand,
521  * on the other hand they are set if the block is not part of an infinite loop.
522  */
523 static void infinite_loop_walker(ir_node *block, void *env)
524 {
525         int arity;
526         int i;
527         (void) env;
528
529         if (! is_Block(block))
530                 return;
531
532         /* start block has no predecessors */
533         if (block == environ->start_block)
534                 return;
535
536         arity = get_irn_arity(block);
537
538         /* block not part of a real loop: no infinite loop */
539         if (get_irn_loop(block) == get_irg_loop(environ->graph))
540                 set_Block_mark(block, 1);
541
542         if (get_Block_mark(block)) {
543                 /* reachable block: mark all cf predecessors */
544                 for (i = 0; i < arity; ++i) {
545                         ir_node *pred = get_Block_cfgpred_block(block, i);
546                         if (is_Bad(pred))
547                                 continue;
548                         set_Block_mark(pred, 1);
549                 }
550         } else {
551                 /* We are in a real loop and see an unreachable block. */
552                 ir_loop *outermost_loop = get_loop_outermost(get_irn_loop(block));
553
554                 /* flag loop as infinite */
555                 set_loop_link(outermost_loop, outermost_loop);
556                 DEBUG_ONLY(inc_stats(gvnpre_stats->infinite_loops);)
557
558                 /* The cf predecessors are unreachable, but can never be part of
559                    an infinite loop, because we just reached them. So we set the
560                    blockmark to prevent triggering the infinite loop detection. */
561
562                 /* passing information to the cf predecessors */
563                 for (i = 0; i < arity; ++i) {
564                         ir_node *pred = get_Block_cfgpred_block(block, i);
565
566                         if (is_Bad(pred))
567                                 continue;
568
569                         /* If our cf predecessor is in the same endless loop,
570                            it is also unreachable. */
571                         if (in_loop(pred, outermost_loop)) {
572                                 set_Block_mark(pred, 0);
573                         } else {
574                                 /* When we leave the unreachable loop, we artificially
575                                    declare the cf predecessor reachable. */
576                                 set_Block_mark(pred, 1);
577                         }
578                 }
579         }
580 }
581
582 /**
583  * Sets loop links of outermost infinite loops to non-zero.
584  */
585 static void analyse_loops(ir_graph *irg)
586 {
587         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK);
588
589         /* reset block mark and loop links */
590         irg_walk_blkwise_graph(irg, clear_block_mark_loop_link, NULL, NULL);
591
592         /* mark end block reachable */
593         set_Block_mark(get_irg_end_block(irg), 1);
594         irg_walk_blkwise_graph(irg, infinite_loop_walker, NULL, NULL);
595
596         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
597 }
598
599 #if IGNORE_INF_LOOPS || NO_INF_LOOPS
600 /**
601  * Returns non-zero if block is part of an infinite loop.
602  */
603 static unsigned is_in_infinite_loop(ir_node *block)
604 {
605         ir_loop *loop;
606
607         assert(is_Block(block));
608         loop = get_irn_loop(block);
609         assert(loop);
610
611         loop = get_loop_outermost(loop);
612         if (loop)
613                 return (get_loop_link(loop) != NULL);
614         else
615                 return 0;
616 }
617 #endif
618
619 /* --------------------------------------------------------
620  * GVN-PRE Exp_gen
621  * --------------------------------------------------------
622  */
623
624 /**
625  * Returns non-zero if a node is movable and a possible candidate for PRE.
626  */
627 static unsigned is_nice_value(ir_node *n)
628 {
629         ir_mode *mode = get_irn_mode(n);
630
631         if (is_Phi(n))
632                 return 1;
633
634 #if LOADS || DIVMODS
635         if (is_Proj(n) && mode != mode_X && mode != mode_T)
636                 return 1;
637 #else
638         if (is_Proj(n))
639                 return 0;
640 #endif
641
642 #if LOADS
643         if (is_Load(n))
644                 return get_Load_volatility(n) == volatility_non_volatile;
645         if (is_Store(n))
646                 return get_Store_volatility(n) == volatility_non_volatile;
647 #endif
648
649         if (get_irn_pinned(n) == op_pin_state_pinned)
650                 return 0;
651
652         if (! mode_is_data(mode)) {
653                 if (! is_Div(n) && ! is_Mod(n))
654                         return 0;
655         }
656         return 1;
657 }
658
659 /**
660  * Checks if a node n is clean in block block for exp_gen.
661  *
662  * @param n      the node
663  * @param block  the block
664  * @return non-zero value for clean node
665  */
666 static unsigned is_clean_in_block(ir_node *n, ir_node *block, ir_valueset_t *valueset)
667 {
668         int         i, arity;
669
670         if (is_Phi(n))
671                 return 1;
672
673         if (! is_nice_value(n))
674                 return 0;
675
676 #if LOADS
677         /* filter loads with no phi predecessor from antic_in */
678         if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
679                 return 0;
680         if (is_Store(n) && ! is_Phi(get_Store_mem(n)))
681                 return 0;
682 #endif
683
684 #if DIVMODS
685         if (is_Div(n)) {
686                 ir_node *mem = get_Div_mem(n);
687
688                 mem = skip_Pin(mem);
689
690                 if (! is_Phi(mem) && ! is_NoMem(mem))
691                         return 0;
692         }
693
694         if (is_Mod(n) && ! is_Phi(get_Mod_mem(n)))
695                 return 0;
696 #endif
697
698         arity = get_irn_arity(n);
699         for (i = 0; i < arity; ++i) {
700                 ir_node *pred   = get_irn_n(n, i);
701                 ir_node *value;
702
703                 if (is_Phi(pred))
704                         continue;
705
706                 /* we only handle current block */
707                 if (get_nodes_block(pred) != block)
708                         continue;
709
710                 if (! is_nice_value(pred))
711                         return 0;
712
713                 value = identify(pred);
714                 if (! ir_valueset_lookup(valueset, value))
715                         return 0;
716
717         }
718         return 1;
719 }
720
721 /**
722  * Topological walker puts nodes in top-down topological order into exp_gen set.
723  * Assumed to walk blockwise and nodewise topologically top-down.
724  *
725  * @param irn    the node
726  * @param ctx    the environment
727  */
728 static void topo_walker(ir_node *irn, void *ctx)
729 {
730         ir_node    *block;
731         block_info *info;
732         ir_node    *value;
733         (void) ctx;
734
735         if (is_Block(irn))
736                 return;
737
738 #if OPTIMIZE_NODES
739         environ->graph->value_table = environ->value_table;
740         identify_remember(irn);
741         environ->graph->value_table = environ->gvnpre_values;
742 #endif
743
744         /* GVN step: remember the value. */
745         value = remember(irn);
746
747         if (is_irn_constlike(irn))
748                 return;
749
750         block = get_nodes_block(irn);
751         info  = get_block_info(block);
752
753         if (get_irn_mode(irn) != mode_X)
754                 ir_valueset_insert(info->avail_out, value, irn);
755
756         /* values that are not in antic_in also dont't need to be in any other set */
757
758         if (! is_nice_value(irn))
759                 return;
760
761         if (is_clean_in_block(irn, block, info->exp_gen)) {
762                 DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
763
764                 ir_valueset_insert(info->exp_gen, value, irn);
765         }
766 }
767
768 /* --------------------------------------------------------
769  * GVN-PRE Antic_in
770  * --------------------------------------------------------
771  */
772
773 /**
774  * Gets result of nodes phi translation into block.
775  *
776  * @param node   the node
777  * @param block  the target block
778  *
779  * @return a phi translation of node node into block block or NULL
780  */
781 static ir_node *get_translated(ir_node *block, ir_node *node)
782 {
783         if (is_irn_constlike(node))
784                 return node;
785
786         return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
787 }
788
789 /**
790  * Saves result of phi translation of node into predecessor
791  * at pos of block succ.
792  *
793  * @param node   the node
794  * @param succ   the successor of the translation target block
795  * @param pos    the position of the predecessor block
796  * @param trans  the translation result
797  *
798  */
799 static void set_translated(ir_nodehashmap_t *map, ir_node *node, ir_node *trans)
800 {
801         if (is_irn_constlike(node))
802                 return;
803         /* insert or replace */
804         ir_nodehashmap_insert(map, node, trans);
805 }
806
807 /**
808  * Translates an expression above a Phi.
809  *
810  * @param node        the node
811  * @param block       the block the node is translated into
812  * @param pos         the input number of the destination block
813  *
814  * @return a node representing the translated value
815  */
816 static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *leaderset)
817 {
818         int       i;
819         int       arity;
820         ir_node **in;
821         ir_node  *pred_block = get_Block_cfgpred_block(block, pos);
822         ir_node  *nn;
823         unsigned  needed;
824
825         if (is_Phi(node)) {
826                 if (get_nodes_block(node) == block)
827                         return get_Phi_pred(node, pos);
828                 /* this phi does not need translation */
829                 return node;
830         }
831         arity = get_irn_arity(node);
832
833         needed = 0;
834         in = ALLOCANZ(ir_node *, arity);
835
836         /* A value has several representatives. The anti leader is chosen to be
837            the main representative. If we access a node as representative of a
838            value we always use the anti leader. The anti leader can be found by
839            antic_in(identify(node)). */
840         for (i = 0; i < arity; ++i) {
841                 ir_node *pred   = get_irn_n(node, i);
842                 ir_node *value  = identify(pred);
843                 /* get leader for pred to lookup its translated value */
844                 ir_node *leader = ir_valueset_lookup(leaderset, value);
845                 ir_node *pred_trans;
846                 ir_node *new_pred;
847
848                 if (! leader)
849                         leader = pred;
850
851                 /* we cannot find this value in antic_in, because the value
852                    has (possibly) changed! */
853                 pred_trans  = get_translated(pred_block, leader);
854
855
856 #if DIVMODS
857                 if (is_Div(node)) {
858                         ir_node *mem = get_Div_mem(node);
859
860                         mem = skip_Pin(mem);
861
862                         if (! is_Phi(mem))
863                                 pred_trans = get_Div_mem(node);
864                 }
865 #endif
866
867                 DB((dbg, LEVEL_3, "trans %+F of %+F is  %+F\n", leader, pred_block, pred_trans));
868                 if (pred_trans == NULL) {
869                         new_pred = pred;
870                 } else {
871                         new_pred = pred_trans;
872
873                         /* loads: Predecessor is a memory phi, which translated yields a proj or
874                            another phi. In case of projection and a load predecessor,
875                            skip them and use the loads memory. */
876                         if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
877 #if LOADS || DIVMODS
878                                 ir_node *loadstore = get_Proj_pred(pred_trans);
879                                 /* If we do not translate this node, we will get its value wrong. */
880                                 needed |= 1;
881
882                                 if (is_Load(loadstore)) {
883                                         /* Put new load under the adjacent loads memory edge
884                                            such that GVN may compare them. */
885                                         new_pred = get_Load_mem(loadstore);
886                                 } else if (is_Store(loadstore)) {
887                                         new_pred = get_Store_mem(loadstore);
888                                 }
889 #endif
890                         } else {
891                                 /* predecessor value changed, so translation is needed */
892                                 if (identify(new_pred) != identify(pred))
893                                         needed |= 1;
894                         }
895                 }
896
897                 DB((dbg, LEVEL_4, "in %+F\n", new_pred));
898                 in[i] = new_pred;
899         }
900
901         if (! needed)
902                 return node;
903
904         DB((dbg, LEVEL_3, "Translate\n"));
905
906         if (is_Proj(node))
907                 pred_block = get_nodes_block(in[0]);
908
909         /* copy node to represent the new value.
910            We do not translate nodes that do not need translation,
911            so we use the newly created nodes as value representatives only.
912            Their block is not important, because we create new ones during
913            the insert node phase. */
914         nn = new_ir_node(
915                 get_irn_dbg_info(node),
916                 environ->graph,
917                 pred_block,
918                 get_irn_op(node),
919                 get_irn_mode(node),
920                 arity,
921                 in);
922         /* We need the attribute copy here, because the Hash value of a
923            node might depend on it. */
924         copy_node_attr(environ->graph, node, nn);
925         /* Optimizing nn here is tempting but might be against the GVN-PRE algorithm
926            because it already uses availability. */
927
928         DB((dbg, LEVEL_3, "New node %+F in %+F origin %+F\n", nn, get_Block_cfgpred_block(block, pos), node));
929         return nn;
930 }
931
932 /**
933  * Block-walker, computes Antic_in(block).
934  * Builds a value tree out of the graph by translating values
935  * over phi nodes.
936  *
937  * @param block  the block
938  * @param ctx    the walker environment
939  */
940 static void compute_antic(ir_node *block, void *ctx)
941 {
942         pre_env                *env       = (pre_env*)ctx;
943         block_info             *succ_info;
944         block_info             *info;
945         ir_node                *succ;
946         ir_node                *value;
947         ir_node                *expr;
948         size_t                  size;
949         ir_valueset_iterator_t  iter;
950         int                     n_succ;
951
952         /* filter blocks from topological walker */
953         if (! is_Block(block))
954                 return;
955
956         /* the end block has no successor */
957         if (block == env->end_block)
958                 return;
959
960         info = get_block_info(block);
961         /* track changes */
962         size = ir_valueset_size(info->antic_in);
963         n_succ = get_Block_n_cfg_outs(block);
964
965         /* add exp_gen */
966         if (env->first_iter) {
967 #if IGNORE_INF_LOOPS
968                 /* keep antic_in of infinite loops empty */
969                 if (! is_in_infinite_loop(block)) {
970                         foreach_valueset(info->exp_gen, value, expr, iter) {
971                                 ir_valueset_insert(info->antic_in, value, expr);
972                         }
973                 }
974 #else
975                 foreach_valueset(info->exp_gen, value, expr, iter) {
976                         ir_valueset_insert(info->antic_in, value, expr);
977                 }
978 #endif
979         }
980
981         /* successor might have phi nodes */
982         if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
983                 succ      = get_Block_cfg_out(block, 0);
984                 int pos   = get_Block_cfgpred_pos(succ, block);
985                 succ_info = get_block_info(succ);
986
987                 /* initialize translated set */
988                 if (env->first_iter) {
989                         info->trans = XMALLOC(ir_nodehashmap_t);
990                         ir_nodehashmap_init(info->trans);
991                 }
992
993                 foreach_valueset(succ_info->antic_in, value, expr, iter) {
994                         ir_node *trans = get_translated(block, expr);
995                         ir_node *trans_value;
996                         ir_node *represent;
997
998                         if (trans == NULL)
999                                 trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
1000
1001                         /* create new value if necessary */
1002                         trans_value = identify_or_remember(trans);
1003
1004                         DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
1005
1006                         /* On value change (phi present) we need the translated node
1007                            to represent the new value for possible further translation. */
1008                         if (value != trans_value)
1009                                 represent = trans;
1010                         else
1011                                 represent = expr;
1012
1013                         if (is_clean_in_block(expr, block, info->antic_in)) {
1014 #if NO_INF_LOOPS
1015                                 /* Prevent information flow over the backedge of endless loops. */
1016                                 if (env->iteration <= 2 || (is_backedge(succ, pos) && !is_in_infinite_loop(succ))) {
1017                                         ir_valueset_replace(info->antic_in, trans_value, represent);
1018                                 }
1019 #else
1020                                 ir_valueset_replace(info->antic_in, trans_value, represent);
1021 #endif
1022                         }
1023                         set_translated(info->trans, expr, represent);
1024                 }
1025
1026         } else if (n_succ > 1) {
1027                 int         i;
1028                 ir_node    *common     = NULL;
1029                 ir_node    *succ0      = get_Block_cfg_out(block, 0);
1030                 block_info *succ0_info = get_block_info(succ0);
1031
1032                 /* disjoint of antic_ins */
1033                 foreach_valueset(succ0_info->antic_in, value, expr, iter) {
1034                         /* iterate over remaining successors */
1035                         for (i = 1; i < n_succ; ++i) {
1036                                 ir_node    *succ      = get_Block_cfg_out(block, i);
1037                                 block_info *succ_info = get_block_info(succ);
1038
1039                                 /* value in antic_in? */
1040                                 common = ir_valueset_lookup(succ_info->antic_in, value);
1041                                 if (common == NULL)
1042                                         break;
1043                         }
1044
1045                         if (common && is_clean_in_block(expr, block, info->antic_in))
1046                                 ir_valueset_replace(info->antic_in, value, expr);
1047                 }
1048         }
1049
1050
1051         DEBUG_ONLY(dump_value_set(info->antic_in, "Antic_in", block);)
1052
1053         if (size != ir_valueset_size(info->antic_in))
1054                 env->changes |= 1;
1055 }
1056
1057 /* --------------------------------------------------------
1058  * Main algorithm Avail_out
1059  * --------------------------------------------------------
1060  */
1061
1062 /**
1063  * Computes Avail_out(block):
1064  *
1065  * Avail_in(block)  = Avail_out(dom(block))
1066  * Avail_out(block) = Avail_in(block) \/ Nodes(block)
1067  *
1068  * Precondition:
1069  *  This function must be called in the top-down topological order:
1070  *  Then it computes Leader(Nodes(block)) instead of Nodes(block) !
1071  *
1072  * @param block   the block
1073  * @param ctx     walker context
1074  */
1075 static void compute_avail_top_down(ir_node *block, void *ctx)
1076 {
1077         pre_env    *env   = (pre_env*)ctx;
1078         block_info *info;
1079
1080         if (block == env->end_block)
1081                 return;
1082
1083         info  = get_block_info(block);
1084
1085         /* Add all nodes from the immediate dominator.
1086            This ensures that avail_out contains the leader. */
1087         if (block != env->start_block) {
1088                 ir_node                *dom_block = get_Block_idom(block);
1089                 block_info             *dom_info  = get_block_info(dom_block);
1090                 ir_node                *value;
1091                 ir_node                *expr;
1092                 ir_valueset_iterator_t  iter;
1093
1094                 foreach_valueset(dom_info->avail_out, value, expr, iter)
1095                         /* replace: use the leader from dominator, not local exp_gen */
1096                         ir_valueset_replace(info->avail_out, value, expr);
1097         }
1098
1099         DEBUG_ONLY(dump_value_set(info->avail_out, "Avail_out", block);)
1100 }
1101
1102 /* --------------------------------------------------------
1103  * Main algorithm redundancy detection
1104  * --------------------------------------------------------
1105  */
1106
1107 /**
1108  * Returns a valid mode if the value of expr is a partially redundant value.
1109  *
1110  * @param block   the block
1111  * @param expr    the expression
1112  *
1113  * @return mode of the expression if it is partially redundant else NULL
1114  */
1115 static ir_mode *is_partially_redundant(ir_node *block, ir_node *expr, ir_node *value)
1116 {
1117         ir_node *first_avail         = NULL;
1118         int      pos;
1119         int      arity               = get_irn_arity(block);
1120         int      fully_redundant     = 1;
1121         int      partially_redundant = 0;
1122         ir_mode *mode                = NULL;
1123
1124         DB((dbg, LEVEL_3, "is partially redundant %+F(%+F) of %+F\n", expr, value, block));
1125
1126         /* for each predecessor blocks */
1127         for (pos = 0; pos < arity; ++pos) {
1128                 ir_node    *pred_block  = get_Block_cfgpred_block(block, pos);
1129                 block_info *pred_info;
1130                 ir_node    *trans_expr;
1131                 ir_node    *trans_value;
1132                 ir_node    *avail_expr;
1133
1134                 pred_info  = get_block_info(pred_block);
1135                 trans_expr = get_translated(pred_block, expr);
1136                 trans_value = identify(trans_expr);
1137
1138                 if (is_Const(trans_expr))
1139                         avail_expr = trans_expr;
1140                 else
1141                         avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1142
1143                 /* value might be available through a not yet existing constant */
1144                 if (avail_expr == NULL && is_Const(trans_expr)) {
1145                         /* limit range of new constants */
1146                         ir_mode   *cmode = get_irn_mode(trans_expr);
1147                         ir_tarval *upper = new_tarval_from_long(127, cmode);
1148                         ir_tarval *lower = new_tarval_from_long(-127, cmode);
1149                         ir_tarval *c     = get_Const_tarval(trans_expr);
1150
1151                         /* tarval within range? */
1152                         if (tarval_cmp(lower, c) == ir_relation_less_equal &&
1153                                 tarval_cmp(c, upper) == ir_relation_less_equal) {
1154                                 avail_expr = trans_expr;
1155                         } else {
1156                                 avail_expr = NULL;
1157                         }
1158             }
1159
1160                 DB((dbg, LEVEL_3, "avail_expr %+F  trans_expr %+F\n", avail_expr, trans_expr));
1161
1162                 if (avail_expr == NULL) {
1163                         pred_info->avail = trans_expr;
1164                         pred_info->found = 0;
1165                         fully_redundant  = 0;
1166                 } else {
1167                         /* expr is available, use the leader */
1168                         pred_info->avail    = avail_expr;
1169                         pred_info->found    = 1;
1170                         mode                = get_irn_mode(avail_expr);
1171                         partially_redundant = 1;
1172
1173                         if (first_avail == NULL)
1174                                 first_avail = avail_expr;
1175                         else if (first_avail != avail_expr)
1176                                 /* Multiple different expressions are available,
1177                                    This is why we need no cut over avail_out sets. */
1178                                 fully_redundant = 0;
1179
1180                         DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
1181                 }
1182         }
1183
1184         /* If it is not the same value already existing along every predecessor
1185        and it is defined by some predecessor then it is partially redundant. */
1186         if (! partially_redundant || fully_redundant)
1187                 return NULL;
1188         return mode;
1189 }
1190
1191 /**
1192  * Updates the new_set of a block by adding the new_set of
1193  * the immediate dominating block.
1194  *
1195  * @param  the block
1196  */
1197 static void update_new_set(ir_node *block, ir_node *idom)
1198 {
1199         ir_node                *value;
1200         ir_node                *expr;
1201         ir_valueset_iterator_t  iter;
1202         block_info             *curr_info = get_block_info(block);
1203         block_info             *idom_info = get_block_info(idom);
1204         int                     updated   = 0;
1205
1206         DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
1207         foreach_valueset(idom_info->new_set, value, expr, iter) {
1208                 /* inherit new_set from immediate dominator */
1209                 ir_valueset_insert(curr_info->new_set, value, expr);
1210                 /* replace in avail_out */
1211                 updated |= ir_valueset_replace(curr_info->avail_out, value, expr);
1212         }
1213 #ifdef DEBUG_libfirm
1214         if (updated)
1215                 dump_value_set(curr_info->avail_out, "Updated [Avail_out]", block);
1216 #endif
1217 }
1218
1219 /**
1220  * Checks if hoisting irn is greedy.
1221  * Greedy hoisting means that there are non partially redundant nodes
1222  * hoisted. This happens if a partially redundant node has
1223  * non redundant predecessors.
1224  */
1225 static unsigned is_hoisting_greedy(ir_node *irn, ir_node *block)
1226 {
1227         int block_arity = get_irn_arity(block);
1228         int arity = get_irn_arity(irn);
1229         int pos, i;
1230         block_info *info = get_block_info(block);
1231
1232         /* As long as the predecessor values are available in all predecessor blocks,
1233            we can hoist this value. */
1234         for (pos = 0; pos < block_arity; ++pos) {
1235                 ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1236                 block_info *pred_info  = get_block_info(pred_block);
1237
1238                 for (i = 0; i < arity; ++i) {
1239                         ir_node *pred     = get_irn_n(irn, i);
1240                         ir_node *value;
1241                         ir_node *leader;
1242                         ir_node *trans;
1243                         ir_node *trans_val;
1244                         ir_node *avail;
1245
1246 #if MIN_CUT
1247                         /* Very conservative min cut. Phi might only have 1 user. */
1248                         if (is_Phi(pred) && get_irn_n_edges(pred) != 1)
1249                                 return 1;
1250 #endif
1251
1252                         if (is_Phi(pred) && get_nodes_block(pred) == block)
1253                                 continue;
1254
1255                         DB((dbg, LEVEL_3, "pred %+F\n", pred));
1256                         value = identify(pred);
1257                         leader = ir_valueset_lookup(info->antic_in, value);
1258                         if (! leader)
1259                                 leader = pred;
1260                         DB((dbg, LEVEL_3, "lead %+F\n", leader));
1261                         trans   = get_translated(pred_block, leader);
1262                         if (! trans)
1263                                 trans = pred;
1264                         DB((dbg, LEVEL_3, "trans %+F\n", trans));
1265
1266                         trans_val = identify(trans);
1267                         DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1268
1269                         if (is_Const(trans_val) || is_SymConst(trans_val)) {
1270                                 /* existing constant */
1271                                 if (get_irn_idx(trans_val) < environ->last_idx) {
1272                                         continue;
1273                                 } else {
1274                                         /* limit range of new constants */
1275                                         ir_mode   *cmode = get_irn_mode(trans);
1276                                         ir_tarval *upper = new_tarval_from_long(128, cmode);
1277                                         ir_tarval *lower = new_tarval_from_long(-128, cmode);
1278                                         ir_tarval *c     = get_Const_tarval(trans);
1279
1280                                         /* tarval within range? */
1281                                         if (tarval_cmp(lower, c) == ir_relation_less &&
1282                                                 tarval_cmp(c, upper) == ir_relation_less) {
1283                                                 continue;
1284                                         } else {
1285                                                 return 1;
1286                                         }
1287                                 }
1288                         }
1289
1290                         /* */
1291                         if (is_irn_constlike(trans_val))
1292                                 continue;
1293
1294                         avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1295
1296                         DB((dbg, LEVEL_3, "avail %+F\n", avail));
1297                         if (! avail)
1298                                 return 1;
1299 #if MIN_CUT
1300                         /* only optimize if predecessors have been optimized */
1301                         if (ir_valueset_lookup(info->antic_done, value) == NULL)
1302                                 return 1;
1303 #endif
1304                 }
1305         }
1306         return 0;
1307 }
1308
1309 /**
1310  * Perform insertion of partially redundant values.
1311  * For every block node, do the following:
1312  * 1.  Propagate the NEW_SETS of the dominator into the current block.
1313  * If the block has multiple predecessors,
1314  *     2a. Iterate over the ANTIC expressions for the block to see if
1315  *         any of them are partially redundant.
1316  *     2b. If so, insert them into the necessary predecessors to make
1317  *         the expression fully redundant.
1318  *     2c. Insert a new Phi merging the values of the predecessors.
1319  *     2d. Insert the new Phi, and the new expressions, into the
1320  *         NEW_SETS set.
1321  *
1322  * @param block  the block
1323  * @param ctx    the walker environment
1324  */
1325 static void insert_nodes_walker(ir_node *block, void *ctx)
1326 {
1327         pre_env                *env    = (pre_env*)ctx;
1328         int                     arity  = get_irn_arity(block);
1329         ir_node                *value;
1330         ir_node                *expr;
1331         block_info             *info;
1332         ir_node                *idom;
1333         int                     pos;
1334         ir_valueset_iterator_t  iter;
1335
1336         /* only blocks */
1337         if (! is_Block(block))
1338                 return;
1339
1340         /* ensure that even the start block has a new_set */
1341         info = get_block_info(block);
1342         if (info->new_set)
1343                 ir_valueset_del(info->new_set);
1344         info->new_set = ir_valueset_new(16);
1345
1346         if (block == env->start_block)
1347                 return;
1348
1349         DB((dbg, LEVEL_2, "Insert operation of %+F\n", block));
1350
1351         idom = get_Block_idom(block);
1352         update_new_set(block, idom);
1353
1354         /* process only path joining blocks */
1355         if (arity < 2) {
1356                 return;
1357         }
1358
1359         /* This is the main reason antic_in is preverred over antic_out;
1360            we may iterate over every anticipated value first and not
1361            over the predecessor blocks. */
1362         foreach_valueset(info->antic_in, value, expr, iter) {
1363                 ir_mode  *mode;
1364                 ir_node  *phi;
1365                 ir_node **phi_in;
1366
1367                 /* already done? */
1368                 if (ir_valueset_lookup(info->antic_done, value))
1369                         continue;
1370
1371                 /* filter phi nodes from antic_in */
1372                 if (is_Phi(expr))
1373                         continue;
1374
1375                 DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
1376
1377                 /* A value computed in the dominator is totally redundant.
1378                    Hence we have nothing to insert. */
1379                 if (ir_valueset_lookup(get_block_info(idom)->avail_out, value)) {
1380                         DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
1381                         DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
1382
1383                         ir_valueset_insert(info->antic_done, value, expr);
1384                         continue;
1385                 }
1386
1387                 if (is_hoisting_greedy(expr, block)) {
1388                         DB((dbg, LEVEL_2, "greedy\n"));
1389                         continue;
1390                 }
1391
1392                 mode = is_partially_redundant(block, expr, value);
1393                 if (mode == NULL)
1394                         continue;
1395
1396 #if LOADS || DIVMODS
1397                 /* save old mode_M phis to remove keepalive edges later */
1398                 if (is_memop(expr)) {
1399                         ir_node *mem = get_memop_mem(expr);
1400                         if (is_Phi(mem) && get_nodes_block(mem) == get_nodes_block(expr)) {
1401                                 ir_nodeset_insert(env->keeps, mem);
1402                         }
1403                 }
1404 #endif
1405
1406 #ifdef DEBUG_libfirm
1407                 if (! is_Proj(expr)) {
1408                         if (env->first_iter)
1409                                 inc_stats(gvnpre_stats->first_iter_found);
1410                         inc_stats(gvnpre_stats->partially);
1411                 }
1412                 if (is_Load(expr) || is_Store(expr))
1413                         inc_stats(gvnpre_stats->loads);
1414                 else if (is_Div(expr) || is_Mod(expr))
1415                         inc_stats(gvnpre_stats->divmods);
1416 #endif
1417
1418                 phi_in = XMALLOCN(ir_node *, arity);
1419
1420                 /* for each predecessor block */
1421                 for (pos = 0; pos < arity; ++pos) {
1422                         ir_node    *pred_block = get_Block_cfgpred_block(block, pos);
1423                         block_info *pred_info;
1424
1425                         pred_info = get_block_info(pred_block);
1426
1427                         if (! pred_info->found) {
1428                                 int i;
1429                                 int node_arity = get_irn_arity(expr);
1430                                 ir_node **in = XMALLOCNZ(ir_node *, node_arity);
1431                                 ir_node *trans;
1432                                 ir_node *new_value, *new_value2;
1433                                 ir_node *target_block = pred_block;
1434
1435                                 for (i = 0; i < node_arity; ++i) {
1436                                         ir_node *pred     = get_irn_n(expr, i);
1437                                         ir_node *value    = identify(pred);
1438                                         ir_node *leader;
1439                                         ir_node *trans;
1440                                         ir_node *trans_val;
1441                                         ir_node *avail;
1442
1443                                         /* transform knowledge over the predecessor from
1444                                            anti-leader world into leader world. */
1445
1446                                         DB((dbg, LEVEL_3, "pred %+F\n", pred));
1447                                         value = identify(pred);
1448
1449                                         /* get leader for pred to lookup its translated value */
1450                                         leader = ir_valueset_lookup(info->antic_in, value);
1451                                         if (! leader)
1452                                                 leader = pred;
1453                                         DB((dbg, LEVEL_3, "lead %+F\n", leader));
1454
1455                                         trans   = get_translated(pred_block, leader);
1456                                         if (!trans)
1457                                                 trans = pred;
1458                                         DB((dbg, LEVEL_3, "trans %+F\n", trans));
1459
1460                                         /* in case of phi, we are done */
1461                                         if (is_Phi(pred) && get_nodes_block(pred) == block) {
1462                                                 in[i] = trans;
1463                                                 continue;
1464                                         }
1465
1466                                         trans_val = identify(trans);
1467                                         DB((dbg, LEVEL_3, "value %+F\n", trans_val));
1468
1469                                         /* constants are always available but not in avail set */
1470                                         if (is_irn_constlike(trans_val)) {
1471                                                 in[i] = trans;
1472                                                 continue;
1473                                         }
1474
1475                                         /* use the leader
1476                                            In case of loads we need to make sure the hoisted
1477                                            loads are found despite their unique value. */
1478                                         avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
1479                                         DB((dbg, LEVEL_3, "avail %+F\n", avail));
1480
1481                                         assert(avail && "predecessor has to be available");
1482                                         in[i] = avail;
1483                                 }
1484
1485                                 if (is_Proj(expr))
1486                                         target_block = get_nodes_block(in[0]);
1487
1488                                 /* Copy node to represent the new value.
1489                                    We use translated nodes as value representatives only.
1490                                    They have anti leaders as predecessors, not leaders!
1491                                    So we have to create a new node using leaders. */
1492                                 trans = new_ir_node(
1493                                         get_irn_dbg_info(expr),
1494                                         environ->graph,
1495                                         target_block,
1496                                         get_irn_op(expr),
1497                                         get_irn_mode(expr),
1498                                         get_irn_arity(expr),
1499                                         in);
1500                                 free(in);
1501                                 /* We need the attribute copy here, because the Hash value of a
1502                                    node might depend on it. */
1503                                 copy_node_attr(environ->graph, expr, trans);
1504
1505                                 /* value is now available in target block through trans
1506                                    insert (not replace) because it has not been available */
1507                                 new_value = identify_or_remember(trans);
1508                                 ir_valueset_insert(pred_info->avail_out, new_value, trans);
1509                                 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value));
1510
1511                                 new_value2 = identify(get_translated(pred_block, expr));
1512                                 ir_valueset_insert(pred_info->avail_out, new_value2, trans);
1513                                 DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value2));
1514
1515                                 DB((dbg, LEVEL_3, "Use new %+F in %+F because %+F(%+F) not available\n", trans, pred_block, expr, value));
1516
1517                                 phi_in[pos] = trans;
1518                         } else {
1519                                 /* value available */
1520                                 phi_in[pos] = pred_info->avail;
1521                         }
1522                         DB((dbg, LEVEL_3, "phi_in %+F\n", phi_in[pos]));
1523                 }
1524
1525                 /* We do not connect tuples as they will be connected automatically
1526                    by the corresponding projections. */
1527                 if (get_irn_mode(expr) != mode_T) {
1528
1529                         phi = new_r_Phi(block, arity, phi_in, mode);
1530                         DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
1531
1532                         /* This value is now available through the new phi.
1533                            insert || replace in avail_out */
1534                         ir_valueset_replace(info->avail_out, value, phi);
1535                         ir_valueset_insert(info->new_set, value, phi);
1536                 }
1537                 free(phi_in);
1538
1539                 /* already optimized this value in this block */
1540                 ir_valueset_insert(info->antic_done, value, expr);
1541                 env->changes |= 1;
1542         }
1543 }
1544
1545 #if HOIST_HIGH
1546 static void update_new_set_walker(ir_node *block, void *ctx)
1547 {
1548         pre_env *env = (pre_env*)ctx;
1549
1550         if (! is_Block(block))
1551                 return;
1552         if (block == env->start_block)
1553                 return;
1554
1555         update_new_set(block, get_Block_idom(block));
1556 }
1557
1558 /**
1559  * Domtree block walker to insert nodes with dying operands
1560  * into the highest possible block whilst still being anticipated.
1561  */
1562 static void hoist_high(ir_node *block, void *ctx)
1563 {
1564         (void)ctx;
1565
1566         block_info             *curr_info;
1567         ir_valueset_iterator_t  iter;
1568         ir_node                *expr;
1569         ir_node                *value;
1570         int                     arity      = get_irn_arity(block);
1571
1572         if (! is_Block(block))
1573                 return;
1574
1575         curr_info = get_block_info(block);
1576
1577         if (curr_info->new_set)
1578                 ir_valueset_del(curr_info->new_set);
1579         curr_info->new_set = ir_valueset_new(16);
1580
1581         if (arity < 2)
1582                 return;
1583
1584         DB((dbg, LEVEL_2, "High hoisting %+F\n", block));
1585
1586         /* foreach entry optimized by insert node phase */
1587         foreach_valueset(curr_info->antic_done, value, expr, iter) {
1588                 int pos;
1589
1590                 /* TODO currently we cannot handle load and their projections */
1591                 if (is_memop(expr) || is_Proj(expr))
1592                         continue;
1593
1594                 DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
1595
1596                 /* visit hoisted expressions */
1597                 for (pos = 0; pos < arity; ++pos) {
1598                         /* standard target is predecessor block */
1599                         ir_node    *target     = get_Block_cfgpred_block(block, pos);
1600                         block_info *pred_info  = get_block_info(target);
1601                         ir_node    *avail;
1602                         ir_node    *new_target;
1603                         ir_node    *trans_expr;
1604                         ir_node    *trans_value;
1605                         ir_node    *dom;
1606                         int         avail_arity;
1607                         int         i;
1608                         unsigned    nest_depth;
1609                         block_info *dom_info;
1610
1611                         /* get phi translated value */
1612                         trans_expr  = get_translated(target, expr);
1613                         trans_value = identify(trans_expr);
1614                         avail       = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
1615
1616                         /* get the used expr on this path */
1617
1618                         /* TODO when does this happen? */
1619                         if (avail == NULL)
1620                                 continue;
1621
1622                         avail_arity = get_irn_arity(avail);
1623                         value = identify(avail);
1624
1625                         /* anticipation border */
1626                         new_target  = NULL;
1627                         nest_depth  = get_loop_depth(get_irn_loop(target));
1628
1629                         /* Either push the hoisted nodes up their path,
1630                            or try to put them directly into their common dominator. */
1631 #if COMMON_DOM
1632                         /* By using block (instead of target) as initial block,
1633                            we only allow hoisting into a common block of
1634                            both predecessor blocks. */
1635                         dom         = block;
1636 #else
1637                         dom         = target;
1638 #endif
1639
1640                         while (dom && dom != get_Block_idom(block)) {
1641
1642                                 dom = get_Block_idom(dom);
1643                                 dom_info = get_block_info(dom);
1644                                 DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
1645
1646                                 /* TODO Being in antic_in means hoistable above block,
1647                                    but we need 'hoistable into block'.
1648                                    This could be achieved by a flag for each valueset pair,
1649                                    being set during antic computation. */
1650
1651                                 /* check if available node ist still anticipated and clean */
1652                                 if (! ir_valueset_lookup(dom_info->antic_in, value)) {
1653                                         DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
1654                                         break;
1655                                 }
1656
1657                                 nest_depth = get_loop_depth(get_irn_loop(dom));
1658
1659                                 /* do not hoist into loops */
1660                                 if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
1661                                         DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
1662                                         /* not a suitable location */
1663                                         continue;
1664                                 }
1665
1666                                 /* check if operands die */
1667
1668                                 /* check for uses on current path */
1669                                 for (i = 0; i < avail_arity; i++) {
1670                                         ir_node   *pred       = get_irn_n(avail, i);
1671                                         ir_node   *pred_value = identify(pred);
1672
1673                                         if (dom == NULL)
1674                                                 break;
1675
1676                                         DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
1677
1678                                         if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
1679                                                 DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
1680                                                 dom = NULL;
1681                                                 break;
1682                                         }
1683
1684                                         /* check every successor */
1685                                         foreach_out_edge(pred, edge) {
1686                                                 ir_node *succ = get_edge_src_irn(edge);
1687                                                 DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
1688
1689                                                 /* check only successors on current path to end */
1690                                                 if (block_dominates(dom, get_nodes_block(succ))) {
1691                                                         ir_node *succ_value = identify(succ);
1692
1693                                                         /* Do we have another user than avail?
1694                                                            Then predecessor is not dead after removal of avail. */
1695                                                         if (succ_value != value) {
1696                                                                 DB((dbg, LEVEL_4, "still used in %+F\n", succ));
1697                                                                 dom = NULL;
1698                                                                 break;
1699                                                         }
1700                                                 }
1701                                         }
1702                                 }
1703                                 if (dom)
1704                                         new_target = dom;
1705
1706 #if COMMON_DOM
1707                                 /* only try common dominator */
1708                                 break;
1709 #endif
1710                         }
1711
1712                         /* put node into new target block */
1713                         if (new_target) {
1714                                 block_info *target_info = get_block_info(new_target);
1715                                 int         nn_arity    = get_irn_arity(avail);
1716                                 ir_node   **in          = XMALLOCN(ir_node *, nn_arity);
1717                                 ir_node    *nn;
1718                                 int         i;
1719
1720                                 DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
1721                                 DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
1722
1723                                 for (i = 0; i < nn_arity; ++i) {
1724                                         ir_node *pred       = get_irn_n(avail, i);
1725                                         ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
1726                                         assert(avail_pred);
1727                                         in[i] = avail_pred;
1728                                 }
1729                                 nn = new_ir_node(
1730                                         get_irn_dbg_info(avail),
1731                                         environ->graph,
1732                                         new_target,
1733                                         get_irn_op(avail),
1734                                         get_irn_mode(avail),
1735                                         nn_arity,
1736                                         in);
1737                                 free(in);
1738
1739                                 identify_or_remember(nn);
1740                                 /* TODO Nodes are inserted into a dominating block and should
1741                                    be available from this point on. Currently we do not push
1742                                    the availability information through during the walk. */
1743                                 ir_valueset_insert(target_info->new_set, value, nn);
1744                                 ir_valueset_insert(target_info->avail_out, value, nn);
1745                         }
1746                 }
1747         }
1748 }
1749 #endif
1750
1751 /* --------------------------------------------------------
1752  * Elimination of fully redundant nodes
1753  * --------------------------------------------------------
1754  */
1755
1756 /**
1757  * Walker which finds redundant nodes using avail_out sets
1758  * and exchanges them for existing ones.
1759  * We cannot change the graph here as this would affect
1760  * the hash values of the nodes.
1761  *
1762  * @param irn  the node
1763  * @param ctx  the walker environment
1764  */
1765 static void eliminate(ir_node *irn, void *ctx)
1766 {
1767         pre_env *env = (pre_env*)ctx;
1768
1769         if (! is_Block(irn)) {
1770                 ir_node    *block = get_nodes_block(irn);
1771                 block_info *info  = get_block_info(block);
1772                 ir_node    *value = identify(irn);
1773
1774                 if (value != NULL) {
1775                         ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
1776                         DB((dbg, LEVEL_3, "Elim %+F(%+F) avail %+F\n", irn, value, expr));
1777
1778                         if (expr != NULL && expr != irn) {
1779                                 elim_pair *p = OALLOC(&env->obst, elim_pair);
1780
1781                                 p->old_node = irn;
1782                                 p->new_node = expr;
1783                                 p->next     = env->pairs;
1784                                 if (get_irn_idx(expr) > env->last_idx)
1785                                         p->reason = FS_OPT_GVN_PARTLY;
1786                                 else
1787                                         p->reason = FS_OPT_GVN_FULLY;
1788                                 env->pairs = p;
1789                                 DEBUG_ONLY(inc_stats(gvnpre_stats->replaced);)
1790                         }
1791                 }
1792         }
1793 }
1794
1795 /**
1796  * Do all the recorded changes and optimize
1797  * newly created Phi's.
1798  *
1799  * @param pairs  list of elimination pairs
1800  */
1801 static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
1802 {
1803         elim_pair             *p;
1804         ir_node               *end    = environ->end_node;
1805
1806         for (p = pairs; p != NULL; p = p->next) {
1807                 /* might be already changed */
1808                 p->new_node = skip_Id(p->new_node);
1809
1810                 DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
1811
1812                 /* PRE tends to create Phi(self, self, ... , x, self, self, ...)
1813                  * which we can optimize here */
1814                 if (is_Phi(p->new_node)) {
1815                         int      i;
1816                         ir_node *res = NULL;
1817
1818                         for (i = get_irn_arity(p->new_node) - 1; i >= 0; --i) {
1819                                 ir_node *pred = get_irn_n(p->new_node, i);
1820
1821                                 if (pred != p->old_node) {
1822                                         if (res) {
1823                                                 res = NULL;
1824                                                 break;
1825                                         }
1826                                         res = pred;
1827                                 }
1828                         }
1829                         if (res) {
1830                                 exchange(p->new_node, res);
1831                                 p->new_node = res;
1832                         }
1833                 }
1834                 DBG_OPT_GVN_PRE(p->old_node, p->new_node, p->reason);
1835
1836                 exchange(p->old_node, p->new_node);
1837         }
1838
1839         /* remove keep alive edges of unused mode_M phis */
1840         foreach_ir_nodeset(keeps, m_phi, iter) {
1841                 remove_End_keepalive(end, m_phi);
1842         }
1843 }
1844
1845
1846 /* --------------------------------------------------------
1847  * GVN PRE pass
1848  * --------------------------------------------------------
1849  */
1850
1851 /**
1852  * Gvn_Pre algorithm.
1853  *
1854  * @param irg   the graph
1855  * @param env   the environment
1856  */
1857 static void gvn_pre(ir_graph *irg, pre_env *env)
1858 {
1859         unsigned              antic_iter;
1860         unsigned              insert_iter;
1861
1862         DB((dbg, LEVEL_1, "Doing GVN-PRE for %+F\n", irg));
1863
1864         /* allocate block info */
1865         irg_walk_blkwise_graph(irg, block_info_walker, NULL, env);
1866
1867         ir_nodehashmap_init(&value_map);
1868
1869         /* generate exp_gen */
1870         irg_walk_blkwise_graph(irg, NULL, topo_walker, env);
1871         dump_all_expgen_sets(env->list);
1872
1873         /* compute the avail_out sets for all blocks */
1874         dom_tree_walk_irg(irg, compute_avail_top_down, NULL, env);
1875
1876         /* compute the anticipated value sets for all blocks */
1877         antic_iter      = 0;
1878         env->first_iter = 1;
1879
1880         env->iteration = 1;
1881         /* antic_in passes */
1882         do {
1883                 ++antic_iter;
1884                 DB((dbg, LEVEL_2, "= Antic_in Iteration %d ========================\n", antic_iter));
1885                 env->changes = 0;
1886                 irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
1887                 env->first_iter = 0;
1888                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1889                 env->iteration ++;
1890         } while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
1891
1892         DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
1893
1894         ir_nodeset_init(env->keeps);
1895         insert_iter       = 0;
1896         env->first_iter   = 1;
1897         /* compute redundant expressions */
1898         do {
1899                 ++insert_iter;
1900                 DB((dbg, LEVEL_2, "= Insert Iteration %d ==========================\n", insert_iter));
1901                 env->changes = 0;
1902                 /* TODO topologically top down would be better; fewer iterations. */
1903                 dom_tree_walk_irg(irg, insert_nodes_walker, NULL, env);
1904                 env->first_iter = 0;
1905                 DB((dbg, LEVEL_2, "----------------------------------------------\n"));
1906         } while (env->changes != 0 && insert_iter < MAX_INSERT_ITER);
1907         DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
1908
1909 #if HOIST_HIGH
1910         /* An attempt to reduce lifetimes by hoisting already hoisted values
1911            even higher if their operands die. */
1912         dom_tree_walk_irg(irg, hoist_high, NULL, NULL);
1913         /* update avail_out for elimination */
1914         dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
1915 #endif
1916
1917         /* Deactivate edges to prevent intelligent removal of nodes,
1918            or else we will get deleted nodes which we try to exchange. */
1919         edges_deactivate(environ->graph);
1920
1921         /* eliminate nodes */
1922         irg_walk_graph(irg, NULL, eliminate, env);
1923         eliminate_nodes(env->pairs, env->keeps);
1924
1925         ir_nodeset_destroy(env->keeps);
1926 }
1927
1928 /**
1929  * Gvn_Pre pass for graph irg.
1930  *
1931  * @param irg   the graph
1932  */
1933 void do_gvn_pre(ir_graph *irg)
1934 {
1935         pre_env               env;
1936         ir_nodeset_t          keeps;
1937         optimization_state_t  state;
1938         block_info           *block_info;
1939
1940         /* bads and unreachables cause too much trouble with dominance,
1941            loop info for endless loop detection,
1942            no critical edges is PRE precondition
1943          */
1944         assure_irg_properties(irg,
1945                 IR_GRAPH_PROPERTY_NO_BADS
1946                 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
1947                 | IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
1948                 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
1949                 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
1950                 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
1951
1952         /* register a debug mask */
1953         FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
1954
1955         save_optimization_state(&state);
1956         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
1957
1958         edges_activate(irg);
1959
1960         environ = &env;
1961         DEBUG_ONLY(init_stats();)
1962
1963         /* setup environment */
1964         env.graph        = irg;
1965         env.list         = NULL;
1966         env.start_block  = get_irg_start_block(irg);
1967         env.end_block    = get_irg_end_block(irg);
1968         env.end_node     = get_irg_end(irg);
1969         env.pairs        = NULL;
1970         env.keeps        = &keeps;
1971         env.last_idx     = get_irg_last_idx(irg);
1972         obstack_init(&env.obst);
1973
1974         /* Detect and set links of infinite loops to non-zero. */
1975         analyse_loops(irg);
1976
1977 #if OPTIMIZE_NODES
1978         new_identities(irg);
1979         env.value_table = irg->value_table;
1980         irg->value_table = NULL;
1981 #endif
1982
1983         /* Switch on GCSE. We need it to correctly compute
1984            the value of a node, which is independent from
1985            its block. */
1986         set_opt_global_cse(1);
1987         /* new_identities() */
1988         if (irg->value_table != NULL)
1989                 del_pset(irg->value_table);
1990         /* initially assumed nodes in pset are 512 */
1991         irg->value_table = new_pset(compare_gvn_identities, 512);
1992 #if OPTIMIZE_NODES
1993         env.gvnpre_values = irg->value_table;
1994 #endif
1995
1996         /* do GVN-PRE pass */
1997         gvn_pre(irg, &env);
1998         DEBUG_ONLY(print_stats();)
1999
2000         /* clean up: delete all sets */
2001         for (block_info = env.list; block_info != NULL; block_info = block_info->next) {
2002                 free_block_info(block_info);
2003         }
2004
2005         DEBUG_ONLY(free_stats();)
2006         ir_nodehashmap_destroy(&value_map);
2007         obstack_free(&env.obst, NULL);
2008         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
2009
2010         /* Pin the graph again.
2011            This is needed due to the use of set_opt_global_cse(1) */
2012         set_irg_pinned(irg, op_pin_state_pinned);
2013         restore_optimization_state(&state);
2014         confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
2015
2016 #if OPTIMIZE_NODES
2017         irg->value_table = env.value_table;
2018         del_pset(irg->value_table);
2019         irg->value_table = env.gvnpre_values;
2020 #endif
2021
2022         /* TODO There seem to be optimizations that try to use the existing
2023            value_table. */
2024         new_identities(irg);
2025
2026         /* TODO assure nothing else breaks. */
2027         set_opt_global_cse(0);
2028         edges_activate(irg);
2029 }
2030
2031 /* Creates an ir_graph pass for do_gvn_pre. */
2032 ir_graph_pass_t *do_gvn_pre_pass(const char *name)
2033 {
2034         return def_graph_pass(name ? name : "gvn_pre", do_gvn_pre);
2035 }