- BugFix: initialize memop->mem
[libfirm] / ir / opt / opt_ldst.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Dataflow driven Load/Store optimizations, uses some ideas from
23  *          VanDrunen's LEPRE
24  * @author  Michael Beck
25  * @version $Id$
26  */
27 #include "config.h"
28
29 #include "irnode_t.h"
30 #include "irflag_t.h"
31 #include "array_t.h"
32 #include "ircons.h"
33 #include "irdom.h"
34 #include "irgmod.h"
35 #include "irgwalk.h"
36 #include "irouts.h"
37 #include "irgraph.h"
38 #include "irgopt.h"
39 #include "iropt.h"
40 #include "iroptimize.h"
41 #include "irnodemap.h"
42 #include "raw_bitset.h"
43 #include "debug.h"
44 #include "error.h"
45
46 /* maximum number of output Proj's */
47 #define MAX_PROJ (pn_Load_max > pn_Store_max ? pn_Load_max : pn_Store_max)
48
49 /**
50  * Mapping an address to an dense ID.
51  */
52 typedef struct address_entry_t {
53         unsigned id;          /**< The ID */
54 } address_entry;
55
56 /**
57  * Memop-flags.
58  */
59 enum memop_flags {
60         FLAG_KILL_LOADS  =  1, /**< KILL all Loads */
61         FLAG_KILL_STORES =  2, /**< KILL all Stores */
62         FLAG_KILLED_NODE =  4, /**< this node was killed */
63         FLAG_EXCEPTION   =  8, /**< this node has exception flow */
64         FLAG_IGNORE      = 16, /**< ignore this node (volatile or other) */
65         /** this memop KILLS all addresses */
66         FLAG_KILL_ALL    = FLAG_KILL_LOADS|FLAG_KILL_STORES
67 };
68
69 /**
70  * A value: This represents a value stored at a given address in
71  * memory. Do not confuse with values from value numbering.
72  */
73 typedef struct value_t value_t;
74 struct value_t {
75         ir_node  *address;    /**< the address of this value */
76         ir_node  *value;      /**< the value itself */
77         ir_mode  *mode;       /**< the mode of the value */
78         unsigned id;          /**< address id */
79 };
80
81 /**
82  * A memop describes an memory-related operation.
83  * These are Loads/Store and all other ops that might modify
84  * memory (Calls, CopyB) or causing exceptions.
85  */
86 typedef struct memop_t memop_t;
87 struct memop_t {
88         value_t  value;      /**< the value of this memop: only defined for Load/Store */
89         ir_node  *node;      /**< the memory op itself */
90         ir_node  *mem;       /**< the memory FROM this node */
91         ir_node  *replace;   /**< the replacement node if this memop is replaced */
92         memop_t  *next;      /**< links to the next memory op in the block in forward order. */
93         memop_t  *prev;      /**< links to the previous memory op in the block in forward order. */
94         unsigned flags;      /**< memop flags */
95         ir_node  *projs[MAX_PROJ]; /**< Projs of this memory op */
96 };
97
98 /**
99  * Additional data for every basic block.
100  */
101 typedef struct block_t block_t;
102 struct block_t {
103         memop_t  *memop_forward;     /**< topologically sorted list of memory ops in this block */
104         memop_t  *memop_backward;    /**< last memop in the list */
105         unsigned *avail_out;         /**< out-set of available addresses */
106         memop_t  **id_2_memop_avail; /**< maps avail address ids to memops */
107         unsigned *anticL_in;         /**< in-set of anticipated Load addresses */
108         memop_t  **id_2_memop_antic; /**< maps anticipated address ids to memops */
109         ir_node  *block;             /**< the associated block */
110         block_t  *forward_next;      /**< next block entry for forward iteration */
111         block_t  *backward_next;     /**< next block entry for backward iteration */
112         memop_t  *avail;             /**< used locally for the avail map */
113 };
114
115 /**
116  * Metadata for this pass.
117  */
118 typedef struct ldst_env_t {
119         struct obstack  obst;              /**< obstack for temporary data */
120         ir_nodemap_t    adr_map;           /**< Map addresses to */
121         block_t         *forward;          /**< Inverse post-order list of all blocks Start->End */
122         block_t         *backward;         /**< Inverse post-order list of all blocks End->Start */
123         ir_node         *start_bl;         /**< start block of the current graph */
124         ir_node         *end_bl;           /**< end block of the current graph */
125         unsigned        *curr_set;         /**< current set of addresses */
126         memop_t         **curr_id_2_memop; /**< current map of address ids to memops */
127         unsigned        curr_adr_id;       /**< number for address mapping */
128         unsigned        n_mem_ops;         /**< number of memory operations (Loads/Stores) */
129         unsigned        rbs_size;          /**< size of all bitsets in bytes */
130         int             changed;           /**< Flags for changed graph state */
131 } ldst_env;
132
133 #ifdef DEBUG_libfirm
134
135 static firm_dbg_module_t *dbg;
136
137 /* the one and only environment */
138 static ldst_env env;
139
140 /**
141  * Dumps the block list.
142  *
143  * @param ldst environment
144  */
145 static void dump_block_list(ldst_env *env) {
146         block_t *entry;
147         memop_t *op;
148         int     i;
149
150         for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
151                 DB((dbg, LEVEL_2, "%+F {", entry->block));
152
153                 i = 0;
154                 for (op = entry->memop_forward; op != NULL; op = op->next) {
155                         if (i == 0) {
156                                 DB((dbg, LEVEL_2, "\n\t"));
157                         }                       DB((dbg, LEVEL_2, "%+F", op->node));
158                         if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
159                                 DB((dbg, LEVEL_2, "X"));
160                         else if (op->flags & FLAG_KILL_LOADS)
161                                 DB((dbg, LEVEL_2, "L"));
162                         else if (op->flags & FLAG_KILL_STORES)
163                                 DB((dbg, LEVEL_2, "S"));
164                         DB((dbg, LEVEL_2, ", "));
165
166                         i = (i + 1) & 3;
167                 }
168                 DB((dbg, LEVEL_2, "\n}\n\n"));
169         }
170 }
171
172 /**
173  * Dumps the current set.
174  *
175  * @param bl   current block
176  * @param s    name of the set
177  */
178 static void dump_curr(block_t *bl, const char *s) {
179         unsigned pos = 0;
180         unsigned end = env.n_mem_ops * 2 - 1;
181         int      i;
182
183         DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
184         i = 0;
185         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
186                 memop_t *op = env.curr_id_2_memop[pos];
187
188                 if (i == 0) {
189                         DB((dbg, LEVEL_2, "\n\t"));
190                 }
191                 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
192                 i = (i + 1) & 3;
193         }
194         DB((dbg, LEVEL_2, "\n}\n"));
195 }
196
197 #else
198 #define dump_block_list()
199 #define dump_curr(bl, s)
200 #endif /* DEBUG_libfirm */
201
202 /** Get the block entry for a block node */
203 static block_t *get_block_entry(const ir_node *block) {
204         assert(is_Block(block));
205
206         return get_irn_link(block);
207 }
208
209 /** Get the memop entry for a memory operation node */
210 static memop_t *get_irn_memop(const ir_node *irn) {
211         assert(! is_Block(irn));
212         return get_irn_link(irn);
213 }
214
215 /**
216  * Walk over the memory edges from definition to users.
217  * This ensures, that even operation without memory output are found.
218  *
219  * @param irn   start node
220  * @param pre   pre walker function
221  * @param post  post walker function
222  * @param ctx   context parameter for the walker functions
223  */
224 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx) {
225         int     i;
226         ir_mode *mode;
227
228         mark_irn_visited(irn);
229
230         if (pre)
231                 pre(irn, ctx);
232
233         mode = get_irn_mode(irn);
234         if (mode == mode_M) {
235                 /* every successor uses memory */
236                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
237                         ir_node *succ = get_irn_out(irn, i);
238
239                         if (! irn_visited(succ))
240                                 walk_memory(succ, pre, post, ctx);
241                 }
242         } else if (mode == mode_T) {
243                 /* only some Proj's uses memory */
244                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
245                         ir_node *proj = get_irn_out(irn, i);
246
247                         if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
248                                 walk_memory(proj, pre, post, ctx);
249                 }
250         }
251         if (post)
252                 post(irn, ctx);
253 }
254
255 /**
256  * Walks over all memory nodes of a graph.
257  *
258  * @param irg   a graph
259  * @param pre   pre walker function
260  * @param post  post walker function
261  * @param ctx   context parameter for the walker functions
262  */
263 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx) {
264         inc_irg_visited(irg);
265
266         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
267
268         /*
269          * there are two possible sources for memory: initial_mem and nomem
270          * we ignore nomem as this should NOT change the memory
271          */
272         walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
273
274         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
275 }
276
277 /**
278  * Block walker: allocate an block entry for every block.
279  */
280 static void prepare_blocks(ir_node *block, void *ctx) {
281         block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
282
283         (void) ctx;
284
285         entry->memop_forward    = NULL;
286         entry->memop_backward   = NULL;
287         entry->avail_out        = NULL;
288         entry->id_2_memop_avail = NULL;
289         entry->anticL_in        = NULL;
290         entry->id_2_memop_antic = NULL;
291         entry->block            = block;
292         entry->forward_next     = env.forward;
293         entry->backward_next    = NULL;
294         entry->avail            = NULL;
295         set_irn_link(block, entry);
296
297         /* create the list in inverse order */
298         env.forward  = entry;
299         /* remember temporary the last one */
300         env.backward = entry;
301 }
302
303 /**
304  * Block walker: create backward links for the memops of a block.
305  */
306 static void collect_backward(ir_node *block, void *ctx) {
307         block_t *entry = get_block_entry(block);
308         memop_t *last, *op;
309
310         (void)ctx;
311
312         /*
313          * Do NOT link in the end block yet. We want it to be
314          * the first in the list. This is NOT guaranteed by the walker
315          * if we have endless loops.
316          */
317         if (block != env.end_bl) {
318                 entry->backward_next = env.backward;
319
320                 /* create the list in inverse order */
321                 env.backward = entry;
322         }
323
324         /* create backward links for all memory ops */
325         last = NULL;
326         for (op = entry->memop_forward; op != NULL; op = op->next) {
327                 op->prev = last;
328                 last     = op;
329         }
330         entry->memop_backward = last;
331 }
332
333 /**
334  * Allocate a memop.
335  *
336  * @param irn  the IR-node representing the memop
337  */
338 static memop_t *alloc_memop(ir_node *irn) {
339         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
340
341         m->value.address = NULL;
342         m->value.value   = NULL;
343         m->value.mode    = NULL;
344
345         m->node          = irn;
346         m->mem           = NULL;
347         m->replace       = NULL;
348         m->next          = NULL;
349         m->flags         = 0;
350
351         memset(m->projs, 0, sizeof(m->projs));
352
353         set_irn_link(irn, m);
354         return m;
355 }
356
357 /**
358  * Create a memop for a Phi-replacement.
359  *
360  * @param op   the memop to clone
361  * @param phi  the Phi-node representing the new value
362  */
363 static memop_t *clone_memop_phi(memop_t *op, ir_node *phi) {
364         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
365
366         m->value         = op->value;
367         m->value.value   = phi;
368
369         m->node          = phi;
370         m->replace       = NULL;
371         m->next          = NULL;
372         m->flags         = 0;
373
374         set_irn_link(phi, m);
375         return m;
376 }
377
378 /**
379  * Register an address and allocate an ID for it.
380  *
381  * @param adr  the IR-node representing the address
382  */
383 static unsigned register_address(ir_node *adr) {
384         address_entry *entry;
385
386         /* skip Confirms and Casts */
387 restart:
388         if (is_Confirm(adr)) {
389                 adr = get_Confirm_value(adr);
390                 goto restart;
391         }
392         if (is_Cast(adr)) {
393                 adr = get_Cast_op(adr);
394                 goto restart;
395         }
396
397         entry = ir_nodemap_get(&env.adr_map, adr);
398
399         if (entry == NULL) {
400                 /* new address */
401                 entry = obstack_alloc(&env.obst, sizeof(*entry));
402
403                 entry->id = env.curr_adr_id++;
404                 ir_nodemap_insert(&env.adr_map, adr, entry);
405
406                 DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
407         }
408         return entry->id;
409 }
410
411 /**
412  * Return the memory properties of a call node.
413  *
414  * @param call  the call node
415  *
416  * return a bitset of mtp_property_const and mtp_property_pure
417  */
418 static unsigned get_Call_memory_properties(ir_node *call) {
419         ir_type *call_tp = get_Call_type(call);
420         unsigned prop = get_method_additional_properties(call_tp);
421
422         /* check first the call type */
423         if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
424                 /* try the called entity */
425                 ir_node *ptr = get_Call_ptr(call);
426
427                 if (is_Global(ptr)) {
428                         ir_entity *ent = get_Global_entity(ptr);
429
430                         prop = get_entity_additional_properties(ent);
431                 }
432         }
433         return prop & (mtp_property_const|mtp_property_pure);
434 }
435
436 /**
437  * Returns an entity if the address ptr points to a constant one.
438  *
439  * @param ptr  the address
440  *
441  * @return an entity or NULL
442  */
443 static ir_entity *find_constant_entity(ir_node *ptr)
444 {
445         for (;;) {
446                 if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
447                         return get_SymConst_entity(ptr);
448                 } else if (is_Sel(ptr)) {
449                         ir_entity *ent = get_Sel_entity(ptr);
450                         ir_type   *tp  = get_entity_owner(ent);
451
452                         /* Do not fiddle with polymorphism. */
453                         if (is_Class_type(get_entity_owner(ent)) &&
454                                 ((get_entity_n_overwrites(ent)    != 0) ||
455                                 (get_entity_n_overwrittenby(ent) != 0)   ) )
456                                 return NULL;
457
458                         if (is_Array_type(tp)) {
459                                 /* check bounds */
460                                 int i, n;
461
462                                 for (i = 0, n = get_Sel_n_indexs(ptr); i < n; ++i) {
463                                         ir_node *bound;
464                                         tarval *tlower, *tupper;
465                                         ir_node *index = get_Sel_index(ptr, i);
466                                         tarval *tv     = computed_value(index);
467
468                                         /* check if the index is constant */
469                                         if (tv == tarval_bad)
470                                                 return NULL;
471
472                                         bound  = get_array_lower_bound(tp, i);
473                                         tlower = computed_value(bound);
474                                         bound  = get_array_upper_bound(tp, i);
475                                         tupper = computed_value(bound);
476
477                                         if (tlower == tarval_bad || tupper == tarval_bad)
478                                                 return NULL;
479
480                                         if (tarval_cmp(tv, tlower) & pn_Cmp_Lt)
481                                                 return NULL;
482                                         if (tarval_cmp(tupper, tv) & pn_Cmp_Lt)
483                                                 return NULL;
484
485                                         /* ok, bounds check finished */
486                                 }
487                         }
488
489                         if (variability_constant == get_entity_variability(ent))
490                                 return ent;
491
492                         /* try next */
493                         ptr = get_Sel_ptr(ptr);
494                 } else if (is_Add(ptr)) {
495                         ir_node *l = get_Add_left(ptr);
496                         ir_node *r = get_Add_right(ptr);
497
498                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
499                                 ptr = l;
500                         else if (get_irn_mode(r) == get_irn_mode(ptr) && is_Const(l))
501                                 ptr = r;
502                         else
503                                 return NULL;
504
505                         /* for now, we support only one addition, reassoc should fold all others */
506                         if (! is_SymConst(ptr) && !is_Sel(ptr))
507                                 return NULL;
508                 } else if (is_Sub(ptr)) {
509                         ir_node *l = get_Sub_left(ptr);
510                         ir_node *r = get_Sub_right(ptr);
511
512                         if (get_irn_mode(l) == get_irn_mode(ptr) &&     is_Const(r))
513                                 ptr = l;
514                         else
515                                 return NULL;
516                         /* for now, we support only one subtraction, reassoc should fold all others */
517                         if (! is_SymConst(ptr) && !is_Sel(ptr))
518                                 return NULL;
519                 } else
520                         return NULL;
521         }
522 }  /* find_constant_entity */
523
524 /**
525  * Return the Selection index of a Sel node from dimension n
526  */
527 static long get_Sel_array_index_long(ir_node *n, int dim) {
528         ir_node *index = get_Sel_index(n, dim);
529         assert(is_Const(index));
530         return get_tarval_long(get_Const_tarval(index));
531 }  /* get_Sel_array_index_long */
532
533 /**
534  * Returns the accessed component graph path for an
535  * node computing an address.
536  *
537  * @param ptr    the node computing the address
538  * @param depth  current depth in steps upward from the root
539  *               of the address
540  */
541 static compound_graph_path *rec_get_accessed_path(ir_node *ptr, int depth) {
542         compound_graph_path *res = NULL;
543         ir_entity           *root, *field, *ent;
544         int                 path_len, pos, idx;
545         tarval              *tv;
546         ir_type             *tp;
547
548         if (is_SymConst(ptr)) {
549                 /* a SymConst. If the depth is 0, this is an access to a global
550                  * entity and we don't need a component path, else we know
551                  * at least its length.
552                  */
553                 assert(get_SymConst_kind(ptr) == symconst_addr_ent);
554                 root = get_SymConst_entity(ptr);
555                 res = (depth == 0) ? NULL : new_compound_graph_path(get_entity_type(root), depth);
556         } else if (is_Sel(ptr)) {
557                 /* it's a Sel, go up until we find the root */
558                 res = rec_get_accessed_path(get_Sel_ptr(ptr), depth+1);
559                 if (res == NULL)
560                         return NULL;
561
562                 /* fill up the step in the path at the current position */
563                 field    = get_Sel_entity(ptr);
564                 path_len = get_compound_graph_path_length(res);
565                 pos      = path_len - depth - 1;
566                 set_compound_graph_path_node(res, pos, field);
567
568                 if (is_Array_type(get_entity_owner(field))) {
569                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
570                         set_compound_graph_path_array_index(res, pos, get_Sel_array_index_long(ptr, 0));
571                 }
572         } else if (is_Add(ptr)) {
573                 ir_node *l    = get_Add_left(ptr);
574                 ir_node *r    = get_Add_right(ptr);
575                 ir_mode *mode = get_irn_mode(ptr);
576                 tarval  *tmp;
577
578                 if (is_Const(r) && get_irn_mode(l) == mode) {
579                         ptr = l;
580                         tv  = get_Const_tarval(r);
581                 } else {
582                         ptr = r;
583                         tv  = get_Const_tarval(l);
584                 }
585 ptr_arith:
586                 mode = get_tarval_mode(tv);
587                 tmp  = tv;
588
589                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
590                 if (is_Sel(ptr)) {
591                         field = get_Sel_entity(ptr);
592                 } else {
593                         field = get_SymConst_entity(ptr);
594                 }
595                 idx = 0;
596                 for (ent = field;;) {
597                         unsigned size;
598                         tarval   *sz, *tv_index, *tlower, *tupper;
599                         ir_node  *bound;
600
601                         tp = get_entity_type(ent);
602                         if (! is_Array_type(tp))
603                                 break;
604                         ent = get_array_element_entity(tp);
605                         size = get_type_size_bytes(get_entity_type(ent));
606                         sz   = new_tarval_from_long(size, mode);
607
608                         tv_index = tarval_div(tmp, sz);
609                         tmp      = tarval_mod(tmp, sz);
610
611                         if (tv_index == tarval_bad || tmp == tarval_bad)
612                                 return NULL;
613
614                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
615                         bound  = get_array_lower_bound(tp, 0);
616                         tlower = computed_value(bound);
617                         bound  = get_array_upper_bound(tp, 0);
618                         tupper = computed_value(bound);
619
620                         if (tlower == tarval_bad || tupper == tarval_bad)
621                                 return NULL;
622
623                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
624                                 return NULL;
625                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
626                                 return NULL;
627
628                         /* ok, bounds check finished */
629                         ++idx;
630                 }
631                 if (! tarval_is_null(tmp)) {
632                         /* access to some struct/union member */
633                         return NULL;
634                 }
635
636                 /* should be at least ONE array */
637                 if (idx == 0)
638                         return NULL;
639
640                 res = rec_get_accessed_path(ptr, depth + idx);
641                 if (res == NULL)
642                         return NULL;
643
644                 path_len = get_compound_graph_path_length(res);
645                 pos      = path_len - depth - idx;
646
647                 for (ent = field;;) {
648                         unsigned size;
649                         tarval   *sz, *tv_index;
650                         long     index;
651
652                         tp = get_entity_type(ent);
653                         if (! is_Array_type(tp))
654                                 break;
655                         ent = get_array_element_entity(tp);
656                         set_compound_graph_path_node(res, pos, ent);
657
658                         size = get_type_size_bytes(get_entity_type(ent));
659                         sz   = new_tarval_from_long(size, mode);
660
661                         tv_index = tarval_div(tv, sz);
662                         tv       = tarval_mod(tv, sz);
663
664                         /* worked above, should work again */
665                         assert(tv_index != tarval_bad && tv != tarval_bad);
666
667                         /* bounds already checked above */
668                         index = get_tarval_long(tv_index);
669                         set_compound_graph_path_array_index(res, pos, index);
670                         ++pos;
671                 }
672         } else if (is_Sub(ptr)) {
673                 ir_node *l = get_Sub_left(ptr);
674                 ir_node *r = get_Sub_right(ptr);
675
676                 ptr = l;
677                 tv  = get_Const_tarval(r);
678                 tv  = tarval_neg(tv);
679                 goto ptr_arith;
680         }
681         return res;
682 }  /* rec_get_accessed_path */
683
684 /**
685  * Returns an access path or NULL.  The access path is only
686  * valid, if the graph is in phase_high and _no_ address computation is used.
687  */
688 static compound_graph_path *get_accessed_path(ir_node *ptr) {
689         compound_graph_path *gr = rec_get_accessed_path(ptr, 0);
690         return gr;
691 }  /* get_accessed_path */
692
693 typedef struct path_entry {
694         ir_entity         *ent;
695         struct path_entry *next;
696         long              index;
697 } path_entry;
698
699 static ir_node *rec_find_compound_ent_value(ir_node *ptr, path_entry *next) {
700         path_entry       entry, *p;
701         ir_entity        *ent, *field;
702         ir_initializer_t *initializer;
703         tarval           *tv;
704         ir_type          *tp;
705         unsigned         n;
706
707         entry.next = next;
708         if (is_SymConst(ptr)) {
709                 /* found the root */
710                 ent         = get_SymConst_entity(ptr);
711                 initializer = get_entity_initializer(ent);
712                 for (p = next; p != NULL;) {
713                         if (initializer->kind != IR_INITIALIZER_COMPOUND)
714                                 return NULL;
715                         n  = get_initializer_compound_n_entries(initializer);
716                         tp = get_entity_type(ent);
717
718                         if (is_Array_type(tp)) {
719                                 ent = get_array_element_entity(tp);
720                                 if (ent != p->ent) {
721                                         /* a missing [0] */
722                                         if (0 >= n)
723                                                 return NULL;
724                                         initializer = get_initializer_compound_value(initializer, 0);
725                                         continue;
726                                 }
727                         }
728                         if (p->index >= (int) n)
729                                 return NULL;
730                         initializer = get_initializer_compound_value(initializer, p->index);
731
732                         ent = p->ent;
733                         p   = p->next;
734                 }
735                 tp = get_entity_type(ent);
736                 while (is_Array_type(tp)) {
737                         ent = get_array_element_entity(tp);
738                         tp = get_entity_type(ent);
739                         /* a missing [0] */
740                         n  = get_initializer_compound_n_entries(initializer);
741                         if (0 >= n)
742                                 return NULL;
743                         initializer = get_initializer_compound_value(initializer, 0);
744                 }
745
746                 switch (initializer->kind) {
747                 case IR_INITIALIZER_CONST:
748                         return get_initializer_const_value(initializer);
749                 case IR_INITIALIZER_TARVAL:
750                 case IR_INITIALIZER_NULL:
751                 default:
752                         return NULL;
753                 }
754         } else if (is_Sel(ptr)) {
755                 entry.ent = field = get_Sel_entity(ptr);
756                 tp = get_entity_owner(field);
757                 if (is_Array_type(tp)) {
758                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
759                         entry.index = get_Sel_array_index_long(ptr, 0) - get_array_lower_bound_int(tp, 0);
760                 } else {
761                         int i, n_members = get_compound_n_members(tp);
762                         for (i = 0; i < n_members; ++i) {
763                                 if (get_compound_member(tp, i) == field)
764                                         break;
765                         }
766                         if (i >= n_members) {
767                                 /* not found: should NOT happen */
768                                 return NULL;
769                         }
770                         entry.index = i;
771                 }
772                 return rec_find_compound_ent_value(get_Sel_ptr(ptr), &entry);
773         }  else if (is_Add(ptr)) {
774                 ir_node  *l = get_Add_left(ptr);
775                 ir_node  *r = get_Add_right(ptr);
776                 ir_mode  *mode;
777                 unsigned pos;
778
779                 if (is_Const(r)) {
780                         ptr = l;
781                         tv  = get_Const_tarval(r);
782                 } else {
783                         ptr = r;
784                         tv  = get_Const_tarval(l);
785                 }
786 ptr_arith:
787                 mode = get_tarval_mode(tv);
788
789                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
790                 if (is_Sel(ptr)) {
791                         field = get_Sel_entity(ptr);
792                 } else {
793                         field = get_SymConst_entity(ptr);
794                 }
795
796                 /* count needed entries */
797                 pos = 0;
798                 for (ent = field;;) {
799                         tp = get_entity_type(ent);
800                         if (! is_Array_type(tp))
801                                 break;
802                         ent = get_array_element_entity(tp);
803                         ++pos;
804                 }
805                 /* should be at least ONE entry */
806                 if (pos == 0)
807                         return NULL;
808
809                 /* allocate the right number of entries */
810                 NEW_ARR_A(path_entry, p, pos);
811
812                 /* fill them up */
813                 pos = 0;
814                 for (ent = field;;) {
815                         unsigned size;
816                         tarval   *sz, *tv_index, *tlower, *tupper;
817                         long     index;
818                         ir_node  *bound;
819
820                         tp = get_entity_type(ent);
821                         if (! is_Array_type(tp))
822                                 break;
823                         ent = get_array_element_entity(tp);
824                         p[pos].ent  = ent;
825                         p[pos].next = &p[pos + 1];
826
827                         size = get_type_size_bytes(get_entity_type(ent));
828                         sz   = new_tarval_from_long(size, mode);
829
830                         tv_index = tarval_div(tv, sz);
831                         tv       = tarval_mod(tv, sz);
832
833                         if (tv_index == tarval_bad || tv == tarval_bad)
834                                 return NULL;
835
836                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
837                         bound  = get_array_lower_bound(tp, 0);
838                         tlower = computed_value(bound);
839                         bound  = get_array_upper_bound(tp, 0);
840                         tupper = computed_value(bound);
841
842                         if (tlower == tarval_bad || tupper == tarval_bad)
843                                 return NULL;
844
845                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
846                                 return NULL;
847                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
848                                 return NULL;
849
850                         /* ok, bounds check finished */
851                         index = get_tarval_long(tv_index);
852                         p[pos].index = index;
853                         ++pos;
854                 }
855                 if (! tarval_is_null(tv)) {
856                         /* hmm, wrong access */
857                         return NULL;
858                 }
859                 p[pos - 1].next = next;
860                 return rec_find_compound_ent_value(ptr, p);
861         } else if (is_Sub(ptr)) {
862                 ir_node *l = get_Sub_left(ptr);
863                 ir_node *r = get_Sub_right(ptr);
864
865                 ptr = l;
866                 tv  = get_Const_tarval(r);
867                 tv  = tarval_neg(tv);
868                 goto ptr_arith;
869         }
870         return NULL;
871 }  /* rec_find_compound_ent_value */
872
873 static ir_node *find_compound_ent_value(ir_node *ptr) {
874         return rec_find_compound_ent_value(ptr, NULL);
875 }  /* find_compound_ent_value */
876
877 /**
878  * Mark a Load memop to be replace by a definition
879  *
880  * @param op  the Load memop
881  */
882 static void mark_replace_load(memop_t *op, ir_node *def) {
883         op->replace = def;
884         op->flags |= FLAG_KILLED_NODE;
885         env.changed = 1;
886 }  /* mark_replace_load */
887
888 /**
889  * Mark a Store memop to be removed.
890  *
891  * @param op  the Store memop
892  */
893 static void mark_remove_store(memop_t *op) {
894         op->flags |= FLAG_KILLED_NODE;
895         env.changed = 1;
896 }  /* mark_remove_store */
897
898 /**
899  * Update a memop for a Load.
900  *
901  * @param m  the memop
902  */
903 static void update_Load_memop(memop_t *m) {
904         int       i;
905         ir_node   *load = m->node;
906         ir_node   *ptr;
907         ir_entity *ent;
908
909         if (get_Load_volatility(load) == volatility_is_volatile)
910                 m->flags |= FLAG_IGNORE;
911
912         ptr = get_Load_ptr(load);
913
914         m->value.address = ptr;
915
916         for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
917                 ir_node *proj = get_irn_out(load, i);
918                 long    pn;
919
920                 /* beware of keep edges */
921                 if (is_End(proj))
922                         continue;
923
924                 pn = get_Proj_proj(proj);
925                 m->projs[pn] = proj;
926                 switch (pn) {
927                 case pn_Load_res:
928                         m->value.value = proj;
929                         m->value.mode  = get_irn_mode(proj);
930                         break;
931                 case pn_Load_X_except:
932                         m->flags |= FLAG_EXCEPTION;
933                         break;
934                 case pn_Load_M:
935                         m->mem = proj;
936                         break;
937                 case pn_Load_X_regular:
938                         break;
939                 default:
940                         panic("Unsupported Proj from Load %+F", proj);
941                 }
942         }
943
944         /* check if we can determine the entity that will be loaded */
945         ent = find_constant_entity(ptr);
946
947         if (ent != NULL                                     &&
948                 allocation_static == get_entity_allocation(ent) &&
949                 visibility_external_allocated != get_entity_visibility(ent)) {
950                 /* a static allocation that is not external: there should be NO exception
951                  * when loading even if we cannot replace the load itself. */
952                 ir_node *value = NULL;
953
954                 /* no exception, clear the m fields as it might be checked later again */
955                 if (m->projs[pn_Load_X_except]) {
956                         exchange(m->projs[pn_Load_X_except], new_Bad());
957                         m->projs[pn_Load_X_except] = NULL;
958                         m->flags &= ~FLAG_EXCEPTION;
959                         env.changed = 1;
960                 }
961                 if (m->projs[pn_Load_X_regular]) {
962                         exchange(m->projs[pn_Load_X_regular], new_r_Jmp(current_ir_graph, get_nodes_block(load)));
963                         m->projs[pn_Load_X_regular] = NULL;
964                         env.changed = 1;
965                 }
966
967                 if (variability_constant == get_entity_variability(ent)) {
968                         if (is_atomic_entity(ent)) {
969                                 /* Might not be atomic after lowering of Sels.  In this case we
970                                  * could also load, but it's more complicated. */
971                                 /* more simpler case: we load the content of a constant value:
972                                  * replace it by the constant itself */
973                                 value = get_atomic_ent_value(ent);
974                         } else if (ent->has_initializer) {
975                                 /* new style initializer */
976                                 value = find_compound_ent_value(ptr);
977                         } else {
978                                 /* old style initializer */
979                                 compound_graph_path *path = get_accessed_path(ptr);
980
981                                 if (path != NULL) {
982                                         assert(is_proper_compound_graph_path(path, get_compound_graph_path_length(path)-1));
983
984                                         value = get_compound_ent_value_by_path(ent, path);
985                                         DB((dbg, LEVEL_1, "  Constant access at %F%F resulted in %+F\n", ent, path, value));
986                                         free_compound_graph_path(path);
987                                 }
988                         }
989                         if (value != NULL)
990                                 value = can_replace_load_by_const(load, value);
991                 }
992
993                 if (value != NULL) {
994                         /* we completely replace the load by this value */
995                         DB((dbg, LEVEL_1, "Replacing Load %+F by constant %+F\n", m->node, value));
996                         mark_replace_load(m, value);
997                         return;
998                 }
999         }
1000
1001         if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
1002                 /* only create an address if this node is NOT killed immediately or ignored */
1003                 m->value.id = register_address(ptr);
1004                 ++env.n_mem_ops;
1005         } else {
1006                 /* no user, KILL it */
1007                 m->flags |= FLAG_KILLED_NODE;
1008         }
1009 }
1010
1011 /**
1012  * Update a memop for a Store.
1013  *
1014  * @param m  the memop
1015  */
1016 static void update_Store_memop(memop_t *m) {
1017         int     i;
1018         ir_node *store = m->node;
1019         ir_node *adr   = get_Store_ptr(store);
1020
1021         if (get_Store_volatility(store) == volatility_is_volatile) {
1022                 m->flags |= FLAG_IGNORE;
1023         } else {
1024                 /* only create an address if this node is NOT ignored */
1025                 m->value.id = register_address(adr);
1026                 ++env.n_mem_ops;
1027         }
1028
1029         m->value.address = adr;
1030
1031         for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
1032                 ir_node *proj = get_irn_out(store, i);
1033                 long    pn;
1034
1035                 /* beware of keep edges */
1036                 if (is_End(proj))
1037                         continue;
1038
1039                 pn = get_Proj_proj(proj);
1040                 m->projs[pn] = proj;
1041                 switch (pn) {
1042                 case pn_Store_X_except:
1043                         m->flags |= FLAG_EXCEPTION;
1044                         break;
1045                 case pn_Store_M:
1046                         m->mem = proj;
1047                         break;
1048                 case pn_Store_X_regular:
1049                         break;
1050                 default:
1051                         panic("Unsupported Proj from Store %+F", proj);
1052                 }
1053         }
1054         m->value.value = get_Store_value(store);
1055         m->value.mode  = get_irn_mode(m->value.value);
1056 }
1057
1058 /**
1059  * Update a memop for a Call.
1060  *
1061  * @param m  the memop
1062  */
1063 static void update_Call_memop(memop_t *m) {
1064         ir_node  *call = m->node;
1065         unsigned prop  = get_Call_memory_properties(call);
1066         int      i;
1067
1068         if (prop & mtp_property_const) {
1069                 /* A constant call did NOT use memory at all, we
1070                    can kick it from the list. */
1071         } else if (prop & mtp_property_pure) {
1072                 /* pure calls READ memory */
1073                 m->flags = FLAG_KILL_STORES;
1074         } else
1075                 m->flags = FLAG_KILL_ALL;
1076
1077         for (i = get_irn_n_outs(call) - 1; i >= 0; --i) {
1078                 ir_node *proj = get_irn_out(call, i);
1079
1080                 /* beware of keep edges */
1081                 if (is_End(proj))
1082                         continue;
1083
1084                 switch (get_Proj_proj(proj)) {
1085                 case pn_Call_X_except:
1086                         m->flags |= FLAG_EXCEPTION;
1087                         break;
1088                 case pn_Call_M_regular:
1089                         m->mem = proj;
1090                         break;
1091                 }
1092         }
1093 }
1094
1095 /**
1096  * Update a memop for a Div/Mod/Quot/DivMod.
1097  *
1098  * @param m  the memop
1099  */
1100 static void update_DivOp_memop(memop_t *m) {
1101         ir_node *div = m->node;
1102         int     i;
1103
1104         for (i = get_irn_n_outs(div) - 1; i >= 0; --i) {
1105                 ir_node *proj = get_irn_out(div, i);
1106
1107                 /* beware of keep edges */
1108                 if (is_End(proj))
1109                         continue;
1110
1111                 switch (get_Proj_proj(proj)) {
1112                 case pn_Generic_X_except:
1113                         m->flags |= FLAG_EXCEPTION;
1114                         break;
1115                 case pn_Generic_M_regular:
1116                         m->mem = proj;
1117                         break;
1118                 }
1119         }
1120 }
1121
1122 /**
1123  * Update a memop for a Phi.
1124  *
1125  * @param m  the memop
1126  */
1127 static void update_Phi_memop(memop_t *m) {
1128         /* the Phi is it's own mem */
1129         m->mem = m->node;
1130 }
1131
1132 /**
1133  * Memory walker: collect all memory ops and build topological lists.
1134  */
1135 static void collect_memops(ir_node *irn, void *ctx) {
1136         memop_t  *op;
1137         ir_node  *block;
1138         block_t  *entry;
1139
1140         (void) ctx;
1141         if (is_Proj(irn)) {
1142                 /* we can safely ignore ProjM's except the initial memory */
1143                 if (irn != get_irg_initial_mem(current_ir_graph))
1144                         return;
1145         }
1146
1147         op    = alloc_memop(irn);
1148         block = get_nodes_block(irn);
1149         entry = get_block_entry(block);
1150
1151         if (is_Phi(irn)) {
1152                 update_Phi_memop(op);
1153                 /* Phis must be always placed first */
1154                 op->next = entry->memop_forward;
1155                 entry->memop_forward = op;
1156                 if (entry->memop_backward == NULL)
1157                         entry->memop_backward = op;
1158         } else {
1159                 switch (get_irn_opcode(irn)) {
1160                 case iro_Load:
1161                         update_Load_memop(op);
1162                         break;
1163                 case iro_Store:
1164                         update_Store_memop(op);
1165                         break;
1166                 case iro_Call:
1167                         update_Call_memop(op);
1168                         break;
1169                 case iro_Sync:
1170                 case iro_Pin:
1171                         op->mem = irn;
1172                         break;
1173                 case iro_Proj:
1174                         /* initial memory */
1175                         op->mem = irn;
1176                         break;
1177                 case iro_Return:
1178                 case iro_End:
1179                         /* we can those to find the memory edge */
1180                         break;
1181                 case iro_Div:
1182                 case iro_DivMod:
1183                 case iro_Quot:
1184                 case iro_Mod:
1185                         update_DivOp_memop(op);
1186                         break;
1187
1188                 case iro_Builtin:
1189                         /* TODO: handle some builtins */
1190                 default:
1191                         /* unsupported operation */
1192                         op->flags = FLAG_KILL_ALL;
1193                 }
1194
1195
1196                 /* all other should be placed last */
1197                 if (entry->memop_backward == NULL) {
1198                         entry->memop_forward = entry->memop_backward = op;
1199                 } else {
1200                         entry->memop_backward->next = op;
1201                         entry->memop_backward       = op;
1202                 }
1203         }
1204 }
1205
1206 /**
1207  * Find an address in the current set.
1208  *
1209  * @param value  the value to be searched for
1210  */
1211 static memop_t *find_address(const value_t *value) {
1212         if (rbitset_is_set(env.curr_set, value->id)) {
1213                 memop_t *res = env.curr_id_2_memop[value->id];
1214
1215                 if (res->value.mode == value->mode)
1216                         return res;
1217                 /* allow hidden casts */
1218                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1219                     get_mode_arithmetic(value->mode) == irma_twos_complement &&
1220                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1221                         return res;
1222         }
1223         return NULL;
1224 }
1225
1226 /**
1227  * Find an address in the avail_out set.
1228  *
1229  * @param bl     the block
1230  * @param value  the value to be searched for
1231  */
1232 static memop_t *find_address_avail(const block_t *bl, const value_t *value) {
1233         if (rbitset_is_set(bl->avail_out, value->id)) {
1234                 memop_t *res = bl->id_2_memop_avail[value->id];
1235
1236                 if (res->value.mode == value->mode)
1237                         return res;
1238                 /* allow hidden casts */
1239                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1240                     get_mode_arithmetic(value->mode) == irma_twos_complement &&
1241                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1242                         return res;
1243         }
1244         return NULL;
1245 }
1246
1247 /**
1248  * Kill all Loads from the current set.
1249  */
1250 static void kill_all_loads(void) {
1251         unsigned pos = 0;
1252         unsigned end = env.n_mem_ops * 2 - 1;
1253
1254         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1255                 memop_t *op = env.curr_id_2_memop[pos];
1256
1257                 if (! is_Store(op->node))
1258                         rbitset_clear(env.curr_set, pos);
1259         }
1260 }
1261
1262 /**
1263  * Kill all Stores from the current set.
1264  */
1265 static void kill_all_stores(void) {
1266         unsigned pos = 0;
1267         unsigned end = env.n_mem_ops * 2 - 1;
1268
1269         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1270                 memop_t *op = env.curr_id_2_memop[pos];
1271
1272                 if (is_Store(op->node))
1273                         rbitset_clear(env.curr_set, pos);
1274         }
1275 }
1276
1277 /**
1278  * Kill all addresses from the current set.
1279  */
1280 static void kill_all(void) {
1281         rbitset_clear_all(env.curr_set, env.rbs_size);
1282
1283         /* set sentinel */
1284         rbitset_set(env.curr_set, env.rbs_size - 1);
1285 }
1286
1287
1288 /**
1289  * Kill Stores that are not alias free due to a Load value from the current set.
1290  *
1291  * @param value  the Load value
1292  */
1293 static void kill_stores(const value_t *value) {
1294         unsigned pos = 0;
1295         unsigned end = env.n_mem_ops * 2 - 1;
1296
1297         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1298                 memop_t *op = env.curr_id_2_memop[pos];
1299
1300                 if (is_Store(op->node)) {
1301                         if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
1302                                                               op->value.address, op->value.mode)) {
1303                                 rbitset_clear(env.curr_set, pos);
1304                                 env.curr_id_2_memop[pos] = NULL;
1305                         }
1306                 }
1307         }
1308 }
1309
1310 /**
1311  * Kill memops that are not alias free due to a Store value from the current set.
1312  *
1313  * @param value  the Store value
1314  */
1315 static void kill_memops(const value_t *value) {
1316         unsigned pos = 0;
1317         unsigned end = env.n_mem_ops * 2 - 1;
1318
1319         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1320                 memop_t *op = env.curr_id_2_memop[pos];
1321
1322                 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
1323                                                           op->value.address, op->value.mode)) {
1324                         rbitset_clear(env.curr_set, pos);
1325                         env.curr_id_2_memop[pos] = NULL;
1326                 }
1327         }
1328 }
1329
1330 /**
1331  * Add the value of a memop to the current set.
1332  *
1333  * @param op  the memory op
1334  */
1335 static void add_memop(memop_t *op) {
1336         rbitset_set(env.curr_set, op->value.id);
1337         env.curr_id_2_memop[op->value.id] = op;
1338 }
1339
1340 /**
1341  * Add the value of a memop to the avail_out set.
1342  *
1343  * @param bl  the block
1344  * @param op  the memory op
1345  */
1346 static void add_memop_avail(block_t *bl, memop_t *op) {
1347         rbitset_set(bl->avail_out, op->value.id);
1348         bl->id_2_memop_avail[op->value.id] = op;
1349 }
1350
1351 /**
1352  * Update a value of a memop to the avail_out set.
1353  *
1354  * @param bl  the block
1355  * @param op  the memory op
1356  */
1357 static void update_memop_avail(block_t *bl, memop_t *op) {
1358         if (rbitset_is_set(bl->avail_out, op->value.id))
1359                 bl->id_2_memop_avail[op->value.id] = op;
1360 }
1361
1362 /**
1363  * Check, if we can convert a value of one mode to another mode
1364  * without changing the representation of bits.
1365  */
1366 static int can_convert_to(const ir_mode *from, const ir_mode *to) {
1367         if (get_mode_arithmetic(from) == irma_twos_complement &&
1368             get_mode_arithmetic(to) == irma_twos_complement &&
1369             get_mode_size_bits(from) == get_mode_size_bits(to))
1370                 return 1;
1371         return 0;
1372 }
1373
1374 /**
1375  * Add a Conv if needed.
1376  */
1377 static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
1378         ir_mode *other = get_irn_mode(irn);
1379         if (other != mode) {
1380                 /* different modes: check if conversion is possible without changing the bits */
1381                 if (can_convert_to(other, mode)) {
1382                         ir_node *block = get_nodes_block(irn);
1383                         return new_r_Conv(current_ir_graph, block, irn, mode);
1384                 }
1385                 /* otherwise not possible ... yet */
1386                 return NULL;
1387         }
1388         return irn;
1389 }
1390
1391 /**
1392  * Do forward dataflow analysis on the given block and calculate the
1393  * GEN and KILL in the current (avail) set.
1394  *
1395  * @param bl  the block
1396  */
1397 static void calc_gen_kill_avail(block_t *bl) {
1398         memop_t *op;
1399         ir_node *def;
1400
1401         for (op = bl->memop_forward; op != NULL; op = op->next) {
1402                 switch (get_irn_opcode(op->node)) {
1403                 case iro_Phi:
1404                         /* meet */
1405                         break;
1406                 case iro_Sync:
1407                         /* join */
1408                         break;
1409                 case iro_Load:
1410                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1411                                 /* do we have this already? */
1412                                 memop_t *other = find_address(&op->value);
1413                                 if (other != NULL && other != op) {
1414                                         def = conv_to(other->value.value, op->value.mode);
1415                                         if (def != NULL) {
1416 #ifdef DEBUG_libfirm
1417                                                 if (is_Store(other->node)) {
1418                                                         /* RAW */
1419                                                         DB((dbg, LEVEL_1, "RAW %+F <- %+F(%+F)\n", op->node, def, other->node));
1420                                                 } else {
1421                                                         /* RAR */
1422                                                         DB((dbg, LEVEL_1, "RAR %+F <- %+F(%+F)\n", op->node, def, other->node));
1423                                                 }
1424 #endif
1425                                                 mark_replace_load(op, def);
1426                                         } else {
1427                                                 /* overwrite it */
1428                                                 add_memop(op);
1429                                         }
1430                                 } else {
1431                                         /* add this value */
1432                                         kill_stores(&op->value);
1433                                         add_memop(op);
1434                                 }
1435                         }
1436                         break;
1437                 case iro_Store:
1438                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1439                                 /* do we have this store already */
1440                                 memop_t *other = find_address(&op->value);
1441                                 if (other != NULL) {
1442                                         if (is_Store(other->node)) {
1443                                                 if (op != other && get_nodes_block(other->node) == get_nodes_block(op->node)) {
1444                                                         /*
1445                                                          * A WAW in the same block we can kick the first store.
1446                                                          * This is a shortcut: we know that the second Store will be anticipated
1447                                                          * then in an case.
1448                                                          */
1449                                                         DB((dbg, LEVEL_1, "WAW %+F <- %+F\n", op->node, other->node));
1450                                                         mark_remove_store(other);
1451                                                         /* FIXME: a Load might be get freed due to this killed store */
1452                                                 }
1453                                         } else if (other->value.value == op->value.value) {
1454                                                 /* WAR */
1455                                                 DB((dbg, LEVEL_1, "WAR %+F <- %+F\n", op->node, other->node));
1456                                                 mark_remove_store(op);
1457                                         } else {
1458                                                 /* we overwrite the value that was loaded */
1459                                                 add_memop(op);
1460                                         }
1461                                 } else {
1462                                         /* add this value */
1463                                         kill_memops(&op->value);
1464                                         add_memop(op);
1465                                 }
1466                         }
1467                         break;
1468                 default:
1469                         switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
1470                         case FLAG_KILL_LOADS|FLAG_KILL_STORES:
1471                                 kill_all();
1472                                 break;
1473                         case FLAG_KILL_LOADS:
1474                                 kill_all_loads();
1475                                 break;
1476                         case FLAG_KILL_STORES:
1477                                 kill_all_stores();
1478                                 break;
1479                         case 0:
1480                                 break;
1481                         }
1482                 }
1483         }
1484 }
1485
1486 #define BYTE_SIZE(x)  (((x) + 7) >> 3)
1487
1488 /**
1489  * Do forward dataflow analysis on a given block to calculate the avail_out set
1490  * for this block only.
1491  *
1492  * @param block  the block
1493  */
1494 static void forward_avail(block_t *bl) {
1495         /* fill the data from the current block */
1496         env.curr_id_2_memop = bl->id_2_memop_avail;
1497         env.curr_set        = bl->avail_out;
1498
1499         calc_gen_kill_avail(bl);
1500         dump_curr(bl, "Avail_out");
1501 }
1502
1503 /**
1504  * Do backward dataflow analysis on a given block to calculate the antic set
1505  * of Loaded addresses.
1506  *
1507  * @param bl  the block
1508  *
1509  * @return non-zero if the set has changed since last iteration
1510  */
1511 static int backward_antic(block_t *bl) {
1512         memop_t *op;
1513         int     n = get_Block_n_cfg_outs(bl->block);
1514
1515         if (n >= 1) {
1516                 ir_node *succ    = get_Block_cfg_out(bl->block, 0);
1517                 block_t *succ_bl = get_block_entry(succ);
1518                 int i;
1519
1520                 rbitset_cpy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1521                 memcpy(env.curr_id_2_memop, succ_bl->id_2_memop_antic, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1522
1523                 for (i = n - 1; i > 0; --i) {
1524                         ir_node *succ    = get_Block_cfg_out(bl->block, i);
1525                         block_t *succ_bl = get_block_entry(succ);
1526
1527                         rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1528                 }
1529         } else {
1530                 /* block ends with a noreturn call */
1531                 kill_all();
1532         }
1533
1534 #if 0
1535         /* cleanup: kill those Loads which address is not available */
1536         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1537                 memop_t *op     = succ_bl->id_2_memop[pos];
1538                 ir_node *ptr    = get_Load_ptr(op->node);
1539                 ir_node *ptr_bl = get_nodes_block(ptr);
1540
1541                 if (!block_dominates(ptr_bl, bl->block))
1542                         rbitset_clear(env.curr_set, pos);
1543         }
1544 #endif
1545
1546         dump_curr(bl, "AnticL_out");
1547
1548         for (op = bl->memop_backward; op != NULL; op = op->prev) {
1549                 switch (get_irn_opcode(op->node)) {
1550                 case iro_Phi:
1551                         /* meet */
1552                         break;
1553                 case iro_Sync:
1554                         /* join */
1555                         break;
1556                 case iro_Load:
1557                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1558                                 /* always add it */
1559                                 add_memop(op);
1560                         }
1561                         break;
1562                 case iro_Store:
1563                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1564                                 /* a Store: check which memops must be killed */
1565                                 kill_memops(&op->value);
1566                         }
1567                         break;
1568                 default:
1569                         switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
1570                         case FLAG_KILL_LOADS|FLAG_KILL_STORES:
1571                                 kill_all();
1572                                 break;
1573                         case FLAG_KILL_LOADS:
1574                                 kill_all_loads();
1575                                 break;
1576                         case FLAG_KILL_STORES:
1577                                 /*kill_all_stores();*/
1578                                 break;
1579                         case 0:
1580                                 break;
1581                         }
1582                 }
1583         }
1584
1585         memcpy(bl->id_2_memop_antic, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1586         if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1587                 /* changed */
1588                 rbitset_cpy(bl->anticL_in, env.curr_set, env.rbs_size);
1589                 dump_curr(bl, "AnticL_in*");
1590                 return 1;
1591         }
1592         dump_curr(bl, "AnticL_in");
1593         return 0;
1594 }
1595
1596 /**
1597  * Replace a Load memop by a already known value.
1598  *
1599  * @param op  the Load memop
1600  */
1601 static void replace_load(memop_t *op) {
1602         ir_node *load = op->node;
1603         ir_node *def  = skip_Id(op->replace);
1604         ir_node *proj;
1605         ir_mode *mode;
1606
1607         if (def != NULL)
1608                 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1609         else {
1610                 if (op->flags & FLAG_EXCEPTION) {
1611                         /* bad: this node is unused and executed for exception only */
1612                         DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1613                         return;
1614                 }
1615                 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1616         }
1617
1618         if (op->mem != NULL) {
1619                 /* in rare cases a Load might have NO memory */
1620                 exchange(op->mem, get_Load_mem(load));
1621         }
1622         proj = op->projs[pn_Load_res];
1623         if (proj != NULL) {
1624                 mode = get_irn_mode(proj);
1625                 if (get_irn_mode(def) != mode) {
1626                         /* a hidden cast */
1627                         dbg_info *db    = get_irn_dbg_info(load);
1628                         ir_node  *block = get_nodes_block(proj);
1629                         def = new_rd_Conv(db, current_ir_graph, block, def, mode);
1630                 }
1631                 exchange(proj, def);
1632         }
1633         proj = op->projs[pn_Load_X_except];
1634         if (proj != NULL) {
1635                 exchange(proj, new_Bad());
1636         }
1637         proj = op->projs[pn_Load_X_regular];
1638         if (proj != NULL) {
1639                 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1640         }
1641 }
1642
1643 /**
1644  * Remove a Store memop.
1645  *
1646  * @param op  the Store memop
1647  */
1648 static void remove_store(memop_t *op) {
1649         ir_node *store = op->node;
1650         ir_node *proj;
1651
1652         DB((dbg, LEVEL_1, "Removing %+F\n", store));
1653
1654         if (op->mem != NULL) {
1655                 /* in rare cases a Store might have no memory */
1656                 exchange(op->mem, get_Store_mem(store));
1657         }
1658         proj = op->projs[pn_Store_X_except];
1659         if (proj != NULL) {
1660                 exchange(proj, new_Bad());
1661         }
1662         proj = op->projs[pn_Store_X_regular];
1663         if (proj != NULL) {
1664                 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(store)));
1665         }
1666 }
1667
1668
1669 /**
1670  * Do all necessary replacements for a given block.
1671  *
1672  * @param bl  the block
1673  */
1674 static void do_replacements(block_t *bl) {
1675         memop_t *op;
1676
1677         for (op = bl->memop_forward; op != NULL; op = op->next) {
1678                 if (op->flags & FLAG_KILLED_NODE) {
1679                         switch (get_irn_opcode(op->node)) {
1680                         case iro_Load:
1681                                 replace_load(op);
1682                                 break;
1683                         case iro_Store:
1684                                 remove_store(op);
1685                                 break;
1686                         }
1687                 }
1688         }
1689 }
1690
1691 /**
1692  * Calculate the Avail_out sets for all basic blocks.
1693  */
1694 static void calcAvail(void) {
1695         memop_t  **tmp_memop = env.curr_id_2_memop;
1696         unsigned *tmp_set    = env.curr_set;
1697         block_t  *bl;
1698
1699         /* calculate avail_out */
1700         DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1701
1702         /* iterate over all blocks in in any order, skip the start block */
1703         for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1704                 forward_avail(bl);
1705         }
1706
1707         /* restore the current sets */
1708         env.curr_id_2_memop = tmp_memop;
1709         env.curr_set        = tmp_set;
1710 }
1711
1712 /**
1713  * Calculate the Antic_in sets for all basic blocks.
1714  */
1715 static void calcAntic(void) {
1716         int i, need_iter;
1717
1718         /* calculate antic_out */
1719         DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1720         i = 0;
1721         do {
1722                 block_t *bl;
1723
1724                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1725
1726                 need_iter = 0;
1727
1728                 /* over all blocks in reverse post order */
1729                 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1730                         need_iter |= backward_antic(bl);
1731                 }
1732                 ++i;
1733         } while (need_iter);
1734         DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1735 }
1736
1737 /**
1738  * Return the node representing the last memory in a block.
1739  *
1740  * @param bl  the block
1741  */
1742 static ir_node *find_first_memory(block_t *bl) {
1743         for (;;) {
1744                 if (bl->memop_forward != NULL) {
1745                         return bl->memop_forward->node;
1746                 }
1747                 /* if there is NO memory in this block, go to the post dominator */
1748                 bl = get_block_entry(get_Block_ipostdom(bl->block));
1749         }
1750 }
1751
1752 /**
1753  * Return the node representing the last memory in a block.
1754  *
1755  * @param bl  the block
1756  */
1757 static ir_node *find_last_memory(block_t *bl) {
1758         for (;;) {
1759                 if (bl->memop_backward != NULL) {
1760                         return bl->memop_backward->mem;
1761                 }
1762                 /* if there is NO memory in this block, go to the dominator */
1763                 bl = get_block_entry(get_Block_idom(bl->block));
1764         }
1765 }
1766
1767 /**
1768  * Reroute all memory users of old memory
1769  * to a new memory IR-node.
1770  *
1771  * @param omem  the old memory IR-node
1772  * @param nmem  the new memory IR-node
1773  */
1774 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
1775         int i;
1776
1777         for (i = get_irn_n_outs(omem) - 1; i >= 0; --i) {
1778                 int     n_pos;
1779                 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1780
1781                 set_irn_n(user, n_pos, nmem);
1782         }
1783
1784         /* all edges previously point to omem now point to nmem */
1785         nmem->out = omem->out;
1786 }
1787
1788 /**
1789  * Reroute memory users of old memory that are dominated by a given block
1790  * to a new memory IR-node.
1791  *
1792  * @param omem     the old memory IR-node
1793  * @param nmem     the new memory IR-node
1794  * @param pass_bl  the block the memory must pass
1795  */
1796 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl) {
1797         int             i, j, n = get_irn_n_outs(omem);
1798         ir_def_use_edge *edges = NEW_ARR_D(ir_def_use_edge, &env.obst, n + 1);
1799
1800         for (i = j = 0; i < n; ++i) {
1801                 int     n_pos;
1802                 ir_node *user   = get_irn_out_ex(omem, i, &n_pos);
1803                 ir_node *use_bl = get_nodes_block(user);
1804
1805
1806                 if (is_Phi(user)) {
1807                         use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1808                 }
1809                 if (block_dominates(pass_bl, use_bl)) {
1810                         /* found an user that is dominated */
1811                         ++j;
1812                         edges[j].pos = n_pos;
1813                         edges[j].use = user;
1814
1815                         set_irn_n(user, n_pos, nmem);
1816                 }
1817         }
1818
1819         /* Modify the out structure: we create a new out edge array on our
1820            temporary obstack here. This should be no problem, as we invalidate the edges
1821            at the end either. */
1822         /* first entry is used for the length */
1823         edges[0].pos = j;
1824         nmem->out = edges;
1825 }
1826
1827 /**
1828  * insert Loads, making partly redundant Loads fully redundant
1829  */
1830 static int insert_Load(block_t *bl) {
1831         ir_node  *block = bl->block;
1832         int      i, n = get_Block_n_cfgpreds(block);
1833         unsigned pos = 0;
1834         unsigned end = env.n_mem_ops * 2 - 1;
1835         int      res = 0;
1836         ir_node  *pred    = get_Block_cfgpred_block(bl->block, 0);
1837         block_t  *pred_bl = get_block_entry(pred);
1838
1839         DB((dbg, LEVEL_3, "processing %+F\n", block));
1840
1841         rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
1842
1843         if (n > 1) {
1844                 int     i, pos;
1845                 ir_node **ins;
1846
1847                 NEW_ARR_A(ir_node *, ins, n);
1848
1849                 /* more than one predecessors, calculate the join for all avail_outs */
1850                 for (i = n - 1; i > 0; --i) {
1851                         ir_node *pred    = skip_Proj(get_Block_cfgpred(block, i));
1852                         block_t *pred_bl = get_block_entry(get_nodes_block(pred));
1853
1854                         rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
1855
1856                         if (is_Load(pred) || is_Store(pred)) {
1857                                 /* We reached this block by an exception from a Load or Store:
1858                                  * the memop creating the exception was NOT completed than, kill it
1859                                  */
1860                                 memop_t *exc_op = get_irn_memop(pred);
1861                                 rbitset_clear(env.curr_set, exc_op->value.id);
1862                         }
1863
1864                 }
1865                 /*
1866                  * Ensure that all values are in the map: build Phi's if necessary:
1867                  * Note: the last bit is the sentinel and ALWAYS set, so start with -2.
1868                  */
1869                 for (pos = env.rbs_size - 2; pos >= 0; --pos) {
1870                         if (! rbitset_is_set(env.curr_set, pos))
1871                                 env.curr_id_2_memop[pos] = NULL;
1872                         else {
1873                                 ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
1874                                 block_t *pred_bl = get_block_entry(pred);
1875                                 int     need_phi = 0;
1876                                 ir_mode *mode;
1877                                 memop_t *first;
1878
1879                                 first  = pred_bl->id_2_memop_avail[pos];
1880                                 ins[0] = first->value.value;
1881                                 mode = get_irn_mode(ins[0]);
1882
1883                                 for (i = 1; i < n; ++i) {
1884                                         pred    = get_Block_cfgpred_block(bl->block, i);
1885                                         pred_bl = get_block_entry(pred);
1886
1887                                         ins[i] = conv_to(pred_bl->id_2_memop_avail[pos]->value.value, mode);
1888                                         if (ins[i] != ins[0]) {
1889                                                 need_phi = 1;
1890                                                 if (ins[i] == NULL) {
1891                                                         /* conversion failed */
1892                                                         need_phi = 2;
1893                                                         break;
1894                                                 }
1895                                         }
1896
1897                                 }
1898                                 switch (need_phi) {
1899                                 case 0:
1900                                         /* no Phi needed */
1901                                         env.curr_id_2_memop[pos] = first;
1902                                         break;
1903                                 case 1: {
1904                                         /* build a Phi  */
1905                                         ir_node *phi = new_r_Phi(current_ir_graph, bl->block, n, ins, mode);
1906                                         memop_t *phiop = alloc_memop(phi);
1907
1908                                         phiop->value = first->value;
1909                                         phiop->value.value = phi;
1910
1911                                         /* no need to link it in, as it is a DATA phi */
1912
1913                                         env.curr_id_2_memop[pos] = phiop;
1914
1915                                         DB((dbg, LEVEL_3, "Created new %+F on merging value for address %+F\n", phi, first->value.address));
1916                                         break;
1917                                 }
1918                                 default:
1919                                         /* not possible because of different modes, delete the entry */
1920                                         rbitset_clear(env.curr_set, pos);
1921                                         break;
1922                                 }
1923                         }
1924                 }
1925         } else {
1926                 /* only one predecessor, simply copy the map */
1927                 memcpy(env.curr_id_2_memop, pred_bl->id_2_memop_avail, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
1928         }
1929
1930
1931         /* recalculate avail by gen and kill */
1932         calc_gen_kill_avail(bl);
1933
1934         if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
1935                 /* the avail set has changed */
1936                 rbitset_cpy(bl->avail_out, env.curr_set, env.rbs_size);
1937                 memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1938                 dump_curr(bl, "Avail_out*");
1939                 res = 1;
1940         }
1941
1942         if (n <= 1)
1943                 return res;
1944
1945         /* check for partly redundant values */
1946         for (pos = rbitset_next(bl->anticL_in, pos, 1); pos != end; pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
1947                 memop_t *op = bl->id_2_memop_antic[pos];
1948                 int     have_some, all_same;
1949                 ir_node *first;
1950
1951                 assert(is_Load(op->node));
1952
1953                 if (op->flags & FLAG_KILLED_NODE)
1954                         continue;
1955                 DB((dbg, LEVEL_3, "anticipated %+F\n", op->node));
1956
1957                 have_some  = 0;
1958                 all_same   = 1;
1959                 first      = 0;
1960                 for (i = n - 1; i >= 0; --i) {
1961                         ir_node *pred    = get_Block_cfgpred_block(block, i);
1962                         block_t *pred_bl = get_block_entry(pred);
1963                         memop_t *e       = find_address_avail(pred_bl, &op->value);
1964                         ir_mode *mode    = op->value.mode;
1965
1966                         if (e == NULL) {
1967                                 ir_node *block = get_nodes_block(op->value.address);
1968                                 if (! block_dominates(block, pred)) {
1969                                         /* cannot place a copy here */
1970                                         have_some = 0;
1971                                         break;
1972                                 }
1973                                 DB((dbg, LEVEL_3, "%+F is not available in predecessor %+F\n", op->node, pred));
1974                                 pred_bl->avail = NULL;
1975                                 all_same       = 0;
1976                         } else {
1977                                 if (e->value.mode != mode && !can_convert_to(e->value.mode, mode)) {
1978                                         /* cannot create a Phi due to different modes */
1979                                         have_some = 0;
1980                                         break;
1981                                 }
1982                                 pred_bl->avail = e;
1983                                 have_some      = 1;
1984                                 DB((dbg, LEVEL_3, "%+F is available for %+F in predecessor %+F\n", e->node, op->node, pred));
1985                                 if (first == NULL)
1986                                         first = e->node;
1987                                 else if (first != e->node)
1988                                         all_same = 0;
1989                         }
1990                 }
1991                 if (have_some && !all_same) {
1992                         ir_mode *mode = op->value.mode;
1993                         ir_node **in, *phi;
1994
1995                         NEW_ARR_A(ir_node *, in, n);
1996
1997                         for (i = n - 1; i >= 0; --i) {
1998                                 ir_node *pred    = get_Block_cfgpred_block(block, i);
1999                                 block_t *pred_bl = get_block_entry(pred);
2000
2001                                 if (pred_bl->avail == NULL) {
2002                                         /* create a new Load here and make to make it fully redundant */
2003                                         dbg_info *db       = get_irn_dbg_info(op->node);
2004                                         ir_node  *last_mem = find_last_memory(pred_bl);
2005                                         ir_node  *load, *def;
2006                                         memop_t  *new_op;
2007
2008                                         assert(last_mem != NULL);
2009                                         load = new_rd_Load(db, current_ir_graph, pred, last_mem, op->value.address, mode, cons_none);
2010                                         def  = new_r_Proj(current_ir_graph, pred, load, mode, pn_Load_res);
2011                                         DB((dbg, LEVEL_1, "Created new %+F in %+F for party redundant %+F\n", load, pred, op->node));
2012
2013                                         new_op                = alloc_memop(load);
2014                                         new_op->mem           = new_r_Proj(current_ir_graph, pred, load, mode_M, pn_Load_M);
2015                                         new_op->value.address = op->value.address;
2016                                         new_op->value.id      = op->value.id;
2017                                         new_op->value.mode    = mode;
2018                                         new_op->value.value   = def;
2019
2020                                         new_op->projs[pn_Load_M]   = new_op->mem;
2021                                         new_op->projs[pn_Load_res] = def;
2022
2023                                         new_op->prev            = pred_bl->memop_backward;
2024                                         pred_bl->memop_backward = new_op;
2025
2026                                         if (pred_bl->memop_forward == NULL)
2027                                                 pred_bl->memop_forward = new_op;
2028
2029                                         if (get_nodes_block(last_mem) == pred) {
2030                                                 /* We have add a new last memory op in pred block.
2031                                                    If pred had already a last mem, reroute all memory
2032                                                    users. */
2033                                                 reroute_all_mem_users(last_mem, new_op->mem);
2034                                         } else {
2035                                                 /* reroute only those memory going through the pre block */
2036                                                 reroute_mem_through(last_mem, new_op->mem, pred);
2037                                         }
2038
2039                                         /* we added this load at the end, so it will be avail anyway */
2040                                         add_memop_avail(pred_bl, new_op);
2041                                         pred_bl->avail = new_op;
2042                                 }
2043                                 in[i] = conv_to(pred_bl->avail->value.value, mode);
2044                         }
2045                         phi = new_r_Phi(current_ir_graph, block, n, in, mode);
2046                         DB((dbg, LEVEL_1, "Created new %+F in %+F for now redundant %+F\n", phi, block, op->node));
2047
2048                         if (get_nodes_block(op->node) == block) {
2049                                 /* The redundant node was in the current block:
2050                                    In that case, DO NOT update avail_out. If it was NOT
2051                                    avail although it is executed in this bLock, it is killed by a later
2052                                    instruction.
2053                                  */
2054                                 memop_t *phi_op = clone_memop_phi(op, phi);
2055
2056                                 update_memop_avail(bl, phi_op);
2057
2058                                 mark_replace_load(op, phi);
2059                         } else {
2060                                 /* The redundant node is NOT in the current block and anticipated. */
2061                                 memop_t *phi_op = clone_memop_phi(op, phi);
2062
2063                                 add_memop_avail(bl, phi_op);
2064
2065                                 /* propagate it downwards */
2066                                 res = 1;
2067                         }
2068                         /* clear it so we do not found it the next iteration */
2069                         rbitset_clear(bl->anticL_in, pos);
2070                 }
2071         }
2072         return res;
2073 }
2074
2075 /**
2076  * Insert Loads upwards.
2077  */
2078 static void insert_Loads_upwards(void) {
2079         int i, need_iter;
2080         block_t *bl;
2081
2082         /* recalculate antic_out and insert Loads */
2083         DB((dbg, LEVEL_2, "Inserting Loads\n"));
2084
2085         i = 0;
2086         do {
2087                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
2088
2089                 need_iter = 0;
2090
2091                 /* over all blocks in reverse post order, skip the start block */
2092                 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
2093                         need_iter |= insert_Load(bl);
2094                 }
2095                 ++i;
2096         } while (need_iter);
2097
2098         DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
2099 }
2100
2101 int opt_ldst(ir_graph *irg) {
2102         block_t  *bl;
2103         ir_graph *rem = current_ir_graph;
2104
2105         current_ir_graph = irg;
2106
2107         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
2108 //      firm_dbg_set_mask(dbg, -1);
2109
2110         DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
2111
2112         /* we need landing pads */
2113         remove_critical_cf_edges(irg);
2114
2115         dump_ir_block_graph(irg, "-XXX");
2116
2117         if (get_opt_alias_analysis()) {
2118                 assure_irg_entity_usage_computed(irg);
2119                 assure_irp_globals_entity_usage_computed();
2120         }
2121
2122         obstack_init(&env.obst);
2123         ir_nodemap_init(&env.adr_map);
2124
2125         env.forward     = NULL;
2126         env.backward    = NULL;
2127         env.curr_adr_id = 0;
2128         env.n_mem_ops   = 0;
2129         env.changed     = 0;
2130         env.start_bl    = get_irg_start_block(irg);
2131         env.end_bl      = get_irg_end_block(irg);
2132
2133         assure_doms(irg);
2134         assure_irg_outs(irg);
2135
2136         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
2137
2138         /* first step: allocate block entries: produces an
2139            inverse post-order list for the CFG */
2140         set_irn_link(env.end_bl, NULL);
2141         irg_out_block_walk(get_irg_start_block(irg), NULL, prepare_blocks, NULL);
2142
2143         if (get_block_entry(env.end_bl) == NULL) {
2144                 /*
2145                  * The end block is NOT reachable due to endless loops
2146                  * or no_return calls. Ensure that it is initialized.
2147                  * Note that this places the entry for the end block first, so we must fix this.
2148                  * env.backwards points to th last block for this purpose.
2149                  */
2150                 prepare_blocks(env.end_bl, NULL);
2151
2152                 bl               = env.forward;
2153                 env.forward      = bl->forward_next;
2154                 bl->forward_next = NULL;
2155
2156                 env.backward->forward_next = bl;
2157         }
2158
2159         /* second step: find and sort all memory ops */
2160         walk_memory_irg(irg, collect_memops, NULL, NULL);
2161
2162         if (env.n_mem_ops == 0) {
2163                 /* no memory ops */
2164                 goto end;
2165         }
2166
2167         /* create the backward links */
2168         env.backward = NULL;
2169         irg_block_walk_graph(irg, NULL, collect_backward, NULL);
2170
2171         /* link the end block in */
2172         bl = get_block_entry(env.end_bl);
2173         bl->backward_next = env.backward;
2174         env.backward      = bl;
2175
2176         /* check that we really start with the start / end block */
2177         assert(env.forward->block  == env.start_bl);
2178         assert(env.backward->block == env.end_bl);
2179
2180         /* create address sets: we know that 2 * n_mem_ops - 1 is an upper bound for all possible addresses */
2181         env.rbs_size = 2 * env.n_mem_ops;
2182
2183         /* create the current set */
2184         env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2185         rbitset_set(env.curr_set, env.rbs_size - 1);
2186         env.curr_id_2_memop = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2187         memset(env.curr_id_2_memop, 0, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2188
2189         for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2190                 /* set sentinel bits */
2191                 bl->avail_out  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2192                 rbitset_set(bl->avail_out, env.rbs_size - 1);
2193
2194                 bl->id_2_memop_avail = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2195                 memset(bl->id_2_memop_avail, 0, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
2196
2197                 bl->anticL_in  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2198                 rbitset_set(bl->anticL_in, env.rbs_size - 1);
2199
2200                 bl->id_2_memop_antic = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2201                 memset(bl->id_2_memop_antic, 0, env.rbs_size * sizeof(bl->id_2_memop_antic[0]));
2202         }
2203
2204 //      dump_block_list(&env);
2205
2206         calcAvail();
2207         calcAntic();
2208
2209         insert_Loads_upwards();
2210
2211         if (env.changed) {
2212                 /* over all blocks in reverse post order */
2213                 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2214                         do_replacements(bl);
2215                 }
2216
2217                 /* not only invalidate but free them. We might allocate new out arrays
2218                    on our obstack which will be deleted yet. */
2219                 free_irg_outs(irg);
2220                 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
2221         }
2222 end:
2223
2224         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2225         ir_nodemap_destroy(&env.adr_map);
2226         obstack_free(&env.obst, NULL);
2227
2228         dump_ir_block_graph(irg, "-YYY");
2229
2230         current_ir_graph = rem;
2231
2232         return env.changed != 0;
2233 }