cleanup: Remove pointless assert(is_${NODE}(x)) just before get_${NODE}_${FOO}(x...
[libfirm] / ir / opt / opt_ldst.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Dataflow driven Load/Store optimizations, uses some ideas from
23  *          VanDrunen's LEPRE
24  * @author  Michael Beck
25  */
26 #include "config.h"
27
28 #include "irnode_t.h"
29 #include "irflag_t.h"
30 #include "array_t.h"
31 #include "ircons.h"
32 #include "irdom.h"
33 #include "irgmod.h"
34 #include "irgwalk.h"
35 #include "irouts.h"
36 #include "irgraph.h"
37 #include "irgopt.h"
38 #include "iropt.h"
39 #include "iroptimize.h"
40 #include "irnodehashmap.h"
41 #include "raw_bitset.h"
42 #include "debug.h"
43 #include "error.h"
44 #include "irpass.h"
45
46 /* maximum number of output Proj's */
47 #define MAX_PROJ ((long)pn_Load_max > (long)pn_Store_max ? (long)pn_Load_max : (long)pn_Store_max)
48
49 /**
50  * Mapping an address to an dense ID.
51  */
52 typedef struct address_entry_t {
53         unsigned id;          /**< The ID */
54 } address_entry;
55
56 /**
57  * Memop-flags.
58  */
59 enum memop_flags {
60         FLAG_KILL_ALL    = 1, /**< KILL all addresses */
61         FLAG_KILLED_NODE = 2, /**< this node was killed */
62         FLAG_EXCEPTION   = 4, /**< this node has exception flow */
63         FLAG_IGNORE      = 8, /**< ignore this node (volatile or other) */
64 };
65
66 /**
67  * A value: This represents a value stored at a given address in
68  * memory. Do not confuse with values from value numbering.
69  */
70 typedef struct value_t value_t;
71 struct value_t {
72         ir_node  *address;    /**< the address of this value */
73         ir_node  *value;      /**< the value itself */
74         ir_mode  *mode;       /**< the mode of the value */
75         unsigned id;          /**< address id */
76 };
77
78 /**
79  * A memop describes an memory-related operation.
80  * These are Loads/Store and all other ops that might modify
81  * memory (Calls, CopyB) or causing exceptions.
82  */
83 typedef struct memop_t memop_t;
84 struct memop_t {
85         value_t  value;      /**< the value of this memop: only defined for Load/Store */
86         ir_node  *node;      /**< the memory op itself */
87         ir_node  *mem;       /**< the memory FROM this node */
88         ir_node  *replace;   /**< the replacement node if this memop is replaced */
89         memop_t  *next;      /**< links to the next memory op in the block in forward order. */
90         memop_t  *prev;      /**< links to the previous memory op in the block in forward order. */
91         unsigned flags;      /**< memop flags */
92         ir_node  *projs[MAX_PROJ+1]; /**< Projs of this memory op */
93 };
94
95 /**
96  * Additional data for every basic block.
97  */
98 typedef struct block_t block_t;
99 struct block_t {
100         memop_t  *memop_forward;     /**< topologically sorted list of memory ops in this block */
101         memop_t  *memop_backward;    /**< last memop in the list */
102         unsigned *avail_out;         /**< out-set of available addresses */
103         memop_t  **id_2_memop_avail; /**< maps avail address ids to memops */
104         unsigned *anticL_in;         /**< in-set of anticipated Load addresses */
105         memop_t  **id_2_memop_antic; /**< maps anticipated address ids to memops */
106         ir_node  *block;             /**< the associated block */
107         block_t  *forward_next;      /**< next block entry for forward iteration */
108         block_t  *backward_next;     /**< next block entry for backward iteration */
109         memop_t  *avail;             /**< used locally for the avail map */
110         memop_t  **trans_results;    /**< used to cached translated nodes due antic calculation. */
111 };
112
113 /**
114  * Metadata for this pass.
115  */
116 typedef struct ldst_env_t {
117         struct obstack   obst;             /**< obstack for temporary data */
118         ir_nodehashmap_t adr_map;          /**< Map addresses to */
119         block_t         *forward;          /**< Inverse post-order list of all blocks Start->End */
120         block_t         *backward;         /**< Inverse post-order list of all blocks End->Start */
121         ir_node         *start_bl;         /**< start block of the current graph */
122         ir_node         *end_bl;           /**< end block of the current graph */
123         unsigned        *curr_set;         /**< current set of addresses */
124         memop_t         **curr_id_2_memop; /**< current map of address ids to memops */
125         unsigned        curr_adr_id;       /**< number for address mapping */
126         unsigned        n_mem_ops;         /**< number of memory operations (Loads/Stores) */
127         size_t          rbs_size;          /**< size of all bitsets in bytes */
128         int             max_cfg_preds;     /**< maximum number of block cfg predecessors */
129         int             changed;           /**< Flags for changed graph state */
130 #ifdef DEBUG_libfirm
131         ir_node         **id_2_address;    /**< maps an id to the used address */
132 #endif
133 } ldst_env;
134
135 /* the one and only environment */
136 static ldst_env env;
137
138 #ifdef DEBUG_libfirm
139
140 static firm_dbg_module_t *dbg;
141
142 /**
143  * Dumps the block list.
144  *
145  * @param ldst environment
146  */
147 static void dump_block_list(ldst_env *env)
148 {
149         block_t *entry;
150         memop_t *op;
151         int     i;
152
153         for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
154                 DB((dbg, LEVEL_2, "%+F {", entry->block));
155
156                 i = 0;
157                 for (op = entry->memop_forward; op != NULL; op = op->next) {
158                         if (i == 0) {
159                                 DB((dbg, LEVEL_2, "\n\t"));
160                         }
161                         DB((dbg, LEVEL_2, "%+F", op->node));
162                         if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
163                                 DB((dbg, LEVEL_2, "X"));
164                         else if (op->flags & FLAG_KILL_ALL)
165                                 DB((dbg, LEVEL_2, "K"));
166                         DB((dbg, LEVEL_2, ", "));
167
168                         i = (i + 1) & 3;
169                 }
170                 DB((dbg, LEVEL_2, "\n}\n\n"));
171         }
172 }
173
174 /**
175  * Dumps the current set.
176  *
177  * @param bl   current block
178  * @param s    name of the set
179  */
180 static void dump_curr(block_t *bl, const char *s)
181 {
182         size_t end = env.rbs_size - 1;
183         size_t pos;
184         int    i;
185
186         DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
187         i = 0;
188         for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
189                 memop_t *op = env.curr_id_2_memop[pos];
190
191                 if (i == 0) {
192                         DB((dbg, LEVEL_2, "\n\t"));
193                 }
194
195                 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
196                 i = (i + 1) & 3;
197         }
198         DB((dbg, LEVEL_2, "\n}\n"));
199 }
200
201 #else
202 static void dump_block_list(ldst_env *env)
203 {
204         (void) env;
205 }
206 static void dump_curr(block_t *bl, const char *s)
207 {
208         (void) bl;
209         (void) s;
210 }
211 #endif /* DEBUG_libfirm */
212
213 /** Get the block entry for a block node */
214 static block_t *get_block_entry(const ir_node *block)
215 {
216         assert(is_Block(block));
217
218         return (block_t*)get_irn_link(block);
219 }
220
221 /** Get the memop entry for a memory operation node */
222 static memop_t *get_irn_memop(const ir_node *irn)
223 {
224         assert(! is_Block(irn));
225         return (memop_t*)get_irn_link(irn);
226 }
227
228 /**
229  * Walk over the memory edges from definition to users.
230  * This ensures, that even operation without memory output are found.
231  *
232  * @param irn   start node
233  * @param pre   pre walker function
234  * @param post  post walker function
235  * @param ctx   context parameter for the walker functions
236  */
237 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx)
238 {
239         ir_mode *mode;
240
241         mark_irn_visited(irn);
242
243         if (pre)
244                 pre(irn, ctx);
245
246         mode = get_irn_mode(irn);
247         if (mode == mode_M) {
248                 /* every successor uses memory */
249                 for (unsigned i = get_irn_n_outs(irn); i-- > 0; ) {
250                         ir_node *succ = get_irn_out(irn, i);
251
252                         if (! irn_visited(succ))
253                                 walk_memory(succ, pre, post, ctx);
254                 }
255         } else if (mode == mode_T) {
256                 /* only some Proj's uses memory */
257                 for (unsigned i = get_irn_n_outs(irn); i-- > 0; ) {
258                         ir_node *proj = get_irn_out(irn, i);
259
260                         if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
261                                 walk_memory(proj, pre, post, ctx);
262                 }
263         }
264         if (post)
265                 post(irn, ctx);
266 }
267
268 /**
269  * Walks over all memory nodes of a graph.
270  *
271  * @param irg   a graph
272  * @param pre   pre walker function
273  * @param post  post walker function
274  * @param ctx   context parameter for the walker functions
275  */
276 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx)
277 {
278         inc_irg_visited(irg);
279
280         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
281
282         /*
283          * there are two possible sources for memory: initial_mem and nomem
284          * we ignore nomem as this should NOT change the memory
285          */
286         walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
287
288         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
289 }
290
291 /**
292  * Register an address and allocate a (sparse, 0..n) ID for it.
293  *
294  * @param adr  the IR-node representing the address
295  *
296  * @return the allocated id
297  */
298 static unsigned register_address(ir_node *adr)
299 {
300         address_entry *entry;
301
302         /* skip Confirms and Casts */
303 restart:
304         if (is_Confirm(adr)) {
305                 adr = get_Confirm_value(adr);
306                 goto restart;
307         }
308         if (is_Cast(adr)) {
309                 adr = get_Cast_op(adr);
310                 goto restart;
311         }
312
313         entry = ir_nodehashmap_get(address_entry, &env.adr_map, adr);
314
315         if (entry == NULL) {
316                 /* new address */
317                 entry = OALLOC(&env.obst, address_entry);
318
319                 entry->id = env.curr_adr_id++;
320                 ir_nodehashmap_insert(&env.adr_map, adr, entry);
321
322                 DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
323 #ifdef DEBUG_libfirm
324                 ARR_APP1(ir_node *, env.id_2_address, adr);
325 #endif
326         }
327         return entry->id;
328 }
329
330
331 /**
332  * translate an address through a Phi node into a given predecessor
333  * block.
334  *
335  * @param address  the address
336  * @param block    the block
337  * @param pos      the position of the predecessor in block
338  */
339 static ir_node *phi_translate(ir_node *address, const ir_node *block, int pos)
340 {
341         if (is_Phi(address) && get_nodes_block(address) == block)
342                 address = get_Phi_pred(address, pos);
343         return address;
344 }
345
346 /**
347  * Walker: allocate an block entry for every block
348  * and register all potential addresses.
349  */
350 static void prepare_blocks(ir_node *irn, void *ctx)
351 {
352         (void)ctx;
353
354         if (is_Block(irn)) {
355                 block_t *entry = OALLOC(&env.obst, block_t);
356                 int     n;
357
358                 entry->memop_forward    = NULL;
359                 entry->memop_backward   = NULL;
360                 entry->avail_out        = NULL;
361                 entry->id_2_memop_avail = NULL;
362                 entry->anticL_in        = NULL;
363                 entry->id_2_memop_antic = NULL;
364                 entry->block            = irn;
365                 entry->forward_next     = NULL;
366                 entry->backward_next    = NULL;
367                 entry->avail            = NULL;
368                 entry->trans_results    = NULL;
369                 set_irn_link(irn, entry);
370
371                 set_Block_phis(irn, NULL);
372
373                 /* use block marks to track unreachable blocks */
374                 set_Block_mark(irn, 0);
375
376                 n = get_Block_n_cfgpreds(irn);
377                 if (n > env.max_cfg_preds)
378                         env.max_cfg_preds = n;
379         } else {
380                 ir_mode *mode = get_irn_mode(irn);
381
382                 if (mode_is_reference(mode)) {
383                         /*
384                          * Register ALL possible addresses: this is overkill yet but
385                          * simpler then doing it for all possible translated addresses
386                          * (which would be sufficient in the moment.
387                          */
388                         (void)register_address(irn);
389                 }
390         }
391 }
392
393 /**
394  * Post-Walker, link in all Phi's
395  */
396 static void link_phis(ir_node *irn, void *ctx)
397 {
398         (void)ctx;
399
400         if (is_Phi(irn)) {
401                 ir_node *block = get_nodes_block(irn);
402                 add_Block_phi(block, irn);
403         }
404 }
405
406 /**
407  * Block walker: creates the inverse post-order list for the CFG.
408  */
409 static void inverse_post_order(ir_node *block, void *ctx)
410 {
411         block_t *entry = get_block_entry(block);
412
413         (void)ctx;
414
415         /* mark this block IS reachable from start */
416         set_Block_mark(block, 1);
417
418         /* create the list in inverse order */
419         entry->forward_next = env.forward;
420         env.forward         = entry;
421
422         /* remember the first visited (last in list) entry, needed for later */
423         if (env.backward == NULL)
424                 env.backward = entry;
425 }
426
427 /**
428  * Block walker: create backward links for the memops of a block.
429  */
430 static void collect_backward(ir_node *block, void *ctx)
431 {
432         block_t *entry = get_block_entry(block);
433         memop_t *last, *op;
434
435         (void)ctx;
436
437         /*
438          * Do NOT link in the end block yet. We want it to be
439          * the first in the list. This is NOT guaranteed by the walker
440          * if we have endless loops.
441          */
442         if (block != env.end_bl) {
443                 entry->backward_next = env.backward;
444
445                 /* create the list in inverse order */
446                 env.backward = entry;
447         }
448
449         /* create backward links for all memory ops */
450         last = NULL;
451         for (op = entry->memop_forward; op != NULL; op = op->next) {
452                 op->prev = last;
453                 last     = op;
454         }
455         entry->memop_backward = last;
456 }
457
458 /**
459  * Allocate a memop.
460  *
461  * @param irn  the IR-node representing the memop or NULL
462  *             if this is a translated (virtual) memop
463  *
464  * @return the allocated memop
465  */
466 static memop_t *alloc_memop(ir_node *irn)
467 {
468         memop_t *m = OALLOC(&env.obst, memop_t);
469
470         m->value.address = NULL;
471         m->value.value   = NULL;
472         m->value.mode    = NULL;
473
474         m->node          = irn;
475         m->mem           = NULL;
476         m->replace       = NULL;
477         m->next          = NULL;
478         m->flags         = 0;
479
480         memset(m->projs, 0, sizeof(m->projs));
481
482         if (irn != NULL)
483                 set_irn_link(irn, m);
484         return m;
485 }
486
487 /**
488  * Create a memop for a Phi-replacement.
489  *
490  * @param op   the memop to clone
491  * @param phi  the Phi-node representing the new value
492  */
493 static memop_t *clone_memop_phi(memop_t *op, ir_node *phi)
494 {
495         memop_t *m = OALLOC(&env.obst, memop_t);
496
497         m->value         = op->value;
498         m->value.value   = phi;
499
500         m->node          = phi;
501         m->replace       = NULL;
502         m->next          = NULL;
503         m->flags         = 0;
504
505         set_irn_link(phi, m);
506         return m;
507 }
508
509 /**
510  * Return the memory properties of a call node.
511  *
512  * @param call  the call node
513  *
514  * return a bitset of mtp_property_const and mtp_property_pure
515  */
516 static unsigned get_Call_memory_properties(ir_node *call)
517 {
518         ir_type *call_tp = get_Call_type(call);
519         unsigned prop = get_method_additional_properties(call_tp);
520
521         /* check first the call type */
522         if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
523                 /* try the called entity */
524                 ir_node *ptr = get_Call_ptr(call);
525
526                 if (is_SymConst_addr_ent(ptr)) {
527                         ir_entity *ent = get_SymConst_entity(ptr);
528
529                         prop = get_entity_additional_properties(ent);
530                 }
531         }
532         return prop & (mtp_property_const|mtp_property_pure);
533 }
534
535 /**
536  * Returns an entity if the address ptr points to a constant one.
537  *
538  * @param ptr  the address
539  *
540  * @return an entity or NULL
541  */
542 static ir_entity *find_constant_entity(ir_node *ptr)
543 {
544         for (;;) {
545                 if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
546                         return get_SymConst_entity(ptr);
547                 } else if (is_Sel(ptr)) {
548                         ir_entity *ent = get_Sel_entity(ptr);
549                         ir_type   *tp  = get_entity_owner(ent);
550
551                         /* Do not fiddle with polymorphism. */
552                         if (is_Class_type(get_entity_owner(ent)) &&
553                                 ((get_entity_n_overwrites(ent)    != 0) ||
554                                 (get_entity_n_overwrittenby(ent) != 0)   ) )
555                                 return NULL;
556
557                         if (is_Array_type(tp)) {
558                                 /* check bounds */
559                                 int i, n;
560
561                                 for (i = 0, n = get_Sel_n_indexs(ptr); i < n; ++i) {
562                                         ir_node   *bound;
563                                         ir_tarval *tlower, *tupper;
564                                         ir_node   *index = get_Sel_index(ptr, i);
565                                         ir_tarval *tv     = computed_value(index);
566
567                                         /* check if the index is constant */
568                                         if (tv == tarval_bad)
569                                                 return NULL;
570
571                                         bound  = get_array_lower_bound(tp, i);
572                                         tlower = computed_value(bound);
573                                         bound  = get_array_upper_bound(tp, i);
574                                         tupper = computed_value(bound);
575
576                                         if (tlower == tarval_bad || tupper == tarval_bad)
577                                                 return NULL;
578
579                                         if (tarval_cmp(tv, tlower) == ir_relation_less)
580                                                 return NULL;
581                                         if (tarval_cmp(tupper, tv) == ir_relation_less)
582                                                 return NULL;
583
584                                         /* ok, bounds check finished */
585                                 }
586                         }
587
588                         if (get_entity_linkage(ent) == IR_LINKAGE_CONSTANT)
589                                 return ent;
590
591                         /* try next */
592                         ptr = get_Sel_ptr(ptr);
593                 } else if (is_Add(ptr)) {
594                         ir_node *l = get_Add_left(ptr);
595                         ir_node *r = get_Add_right(ptr);
596
597                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
598                                 ptr = l;
599                         else if (get_irn_mode(r) == get_irn_mode(ptr) && is_Const(l))
600                                 ptr = r;
601                         else
602                                 return NULL;
603
604                         /* for now, we support only one addition, reassoc should fold all others */
605                         if (! is_SymConst(ptr) && !is_Sel(ptr))
606                                 return NULL;
607                 } else if (is_Sub(ptr)) {
608                         ir_node *l = get_Sub_left(ptr);
609                         ir_node *r = get_Sub_right(ptr);
610
611                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
612                                 ptr = l;
613                         else
614                                 return NULL;
615                         /* for now, we support only one subtraction, reassoc should fold all others */
616                         if (! is_SymConst(ptr) && !is_Sel(ptr))
617                                 return NULL;
618                 } else
619                         return NULL;
620         }
621 }
622
623 /**
624  * Return the Selection index of a Sel node from dimension n
625  */
626 static long get_Sel_array_index_long(ir_node *n, int dim)
627 {
628         ir_node *index = get_Sel_index(n, dim);
629         return get_tarval_long(get_Const_tarval(index));
630 }
631
632 typedef struct path_entry {
633         ir_entity         *ent;
634         struct path_entry *next;
635         size_t            index;
636 } path_entry;
637
638 static ir_node *rec_find_compound_ent_value(ir_node *ptr, path_entry *next)
639 {
640         path_entry       entry, *p;
641         ir_entity        *ent, *field;
642         ir_initializer_t *initializer;
643         ir_tarval        *tv;
644         ir_type          *tp;
645         size_t           n;
646
647         entry.next = next;
648         if (is_SymConst(ptr)) {
649                 /* found the root */
650                 ent         = get_SymConst_entity(ptr);
651                 initializer = get_entity_initializer(ent);
652                 for (p = next; p != NULL;) {
653                         if (initializer->kind != IR_INITIALIZER_COMPOUND)
654                                 return NULL;
655                         n  = get_initializer_compound_n_entries(initializer);
656                         tp = get_entity_type(ent);
657
658                         if (is_Array_type(tp)) {
659                                 ent = get_array_element_entity(tp);
660                                 if (ent != p->ent) {
661                                         /* a missing [0] */
662                                         if (0 >= n)
663                                                 return NULL;
664                                         initializer = get_initializer_compound_value(initializer, 0);
665                                         continue;
666                                 }
667                         }
668                         if (p->index >= n)
669                                 return NULL;
670                         initializer = get_initializer_compound_value(initializer, p->index);
671
672                         ent = p->ent;
673                         p   = p->next;
674                 }
675                 tp = get_entity_type(ent);
676                 while (is_Array_type(tp)) {
677                         ent = get_array_element_entity(tp);
678                         tp = get_entity_type(ent);
679                         /* a missing [0] */
680                         n  = get_initializer_compound_n_entries(initializer);
681                         if (0 >= n)
682                                 return NULL;
683                         initializer = get_initializer_compound_value(initializer, 0);
684                 }
685
686                 switch (initializer->kind) {
687                 case IR_INITIALIZER_CONST:
688                         return get_initializer_const_value(initializer);
689                 case IR_INITIALIZER_TARVAL:
690                 case IR_INITIALIZER_NULL:
691                 default:
692                         return NULL;
693                 }
694         } else if (is_Sel(ptr)) {
695                 entry.ent = field = get_Sel_entity(ptr);
696                 tp = get_entity_owner(field);
697                 if (is_Array_type(tp)) {
698                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
699                         entry.index = get_Sel_array_index_long(ptr, 0) - get_array_lower_bound_int(tp, 0);
700                 } else {
701                         size_t i, n_members = get_compound_n_members(tp);
702                         for (i = 0; i < n_members; ++i) {
703                                 if (get_compound_member(tp, i) == field)
704                                         break;
705                         }
706                         if (i >= n_members) {
707                                 /* not found: should NOT happen */
708                                 return NULL;
709                         }
710                         entry.index = i;
711                 }
712                 return rec_find_compound_ent_value(get_Sel_ptr(ptr), &entry);
713         }  else if (is_Add(ptr)) {
714                 ir_mode *mode;
715                 unsigned pos;
716
717                 {
718                         ir_node *l = get_Add_left(ptr);
719                         ir_node *r = get_Add_right(ptr);
720                         if (is_Const(r)) {
721                                 ptr = l;
722                                 tv  = get_Const_tarval(r);
723                         } else {
724                                 ptr = r;
725                                 tv  = get_Const_tarval(l);
726                         }
727                 }
728 ptr_arith:
729                 mode = get_tarval_mode(tv);
730
731                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
732                 if (is_Sel(ptr)) {
733                         field = get_Sel_entity(ptr);
734                 } else {
735                         field = get_SymConst_entity(ptr);
736                 }
737
738                 /* count needed entries */
739                 pos = 0;
740                 for (ent = field;;) {
741                         tp = get_entity_type(ent);
742                         if (! is_Array_type(tp))
743                                 break;
744                         ent = get_array_element_entity(tp);
745                         ++pos;
746                 }
747                 /* should be at least ONE entry */
748                 if (pos == 0)
749                         return NULL;
750
751                 /* allocate the right number of entries */
752                 NEW_ARR_A(path_entry, p, pos);
753
754                 /* fill them up */
755                 pos = 0;
756                 for (ent = field;;) {
757                         unsigned  size;
758                         ir_tarval *sz, *tv_index, *tlower, *tupper;
759                         size_t    index;
760                         ir_node   *bound;
761
762                         tp = get_entity_type(ent);
763                         if (! is_Array_type(tp))
764                                 break;
765                         ent = get_array_element_entity(tp);
766                         p[pos].ent  = ent;
767                         p[pos].next = &p[pos + 1];
768
769                         size = get_type_size_bytes(get_entity_type(ent));
770                         sz   = new_tarval_from_long(size, mode);
771
772                         tv_index = tarval_div(tv, sz);
773                         tv       = tarval_mod(tv, sz);
774
775                         if (tv_index == tarval_bad || tv == tarval_bad)
776                                 return NULL;
777
778                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
779                         bound  = get_array_lower_bound(tp, 0);
780                         tlower = computed_value(bound);
781                         bound  = get_array_upper_bound(tp, 0);
782                         tupper = computed_value(bound);
783
784                         if (tlower == tarval_bad || tupper == tarval_bad)
785                                 return NULL;
786
787                         if (tarval_cmp(tv_index, tlower) == ir_relation_less)
788                                 return NULL;
789                         if (tarval_cmp(tupper, tv_index) == ir_relation_less)
790                                 return NULL;
791
792                         /* ok, bounds check finished */
793                         index = get_tarval_long(tv_index);
794                         p[pos].index = index;
795                         ++pos;
796                 }
797                 if (! tarval_is_null(tv)) {
798                         /* hmm, wrong access */
799                         return NULL;
800                 }
801                 p[pos - 1].next = next;
802                 return rec_find_compound_ent_value(ptr, p);
803         } else if (is_Sub(ptr)) {
804                 ir_node *l = get_Sub_left(ptr);
805                 ir_node *r = get_Sub_right(ptr);
806
807                 ptr = l;
808                 tv  = get_Const_tarval(r);
809                 tv  = tarval_neg(tv);
810                 goto ptr_arith;
811         }
812         return NULL;
813 }
814
815 static ir_node *find_compound_ent_value(ir_node *ptr)
816 {
817         return rec_find_compound_ent_value(ptr, NULL);
818 }
819
820 /**
821  * Mark a Load memop to be replace by a definition
822  *
823  * @param op  the Load memop
824  */
825 static void mark_replace_load(memop_t *op, ir_node *def)
826 {
827         op->replace = def;
828         op->flags |= FLAG_KILLED_NODE;
829         env.changed = 1;
830 }
831
832 /**
833  * Mark a Store memop to be removed.
834  *
835  * @param op  the Store memop
836  */
837 static void mark_remove_store(memop_t *op)
838 {
839         op->flags |= FLAG_KILLED_NODE;
840         env.changed = 1;
841 }
842
843 /**
844  * Update a memop for a Load.
845  *
846  * @param m  the memop
847  */
848 static void update_Load_memop(memop_t *m)
849 {
850         ir_node   *load = m->node;
851         ir_node   *ptr;
852         ir_entity *ent;
853
854         if (get_Load_volatility(load) == volatility_is_volatile)
855                 m->flags |= FLAG_IGNORE;
856
857         ptr = get_Load_ptr(load);
858
859         m->value.address = ptr;
860
861         for (unsigned i = get_irn_n_outs(load); i-- > 0; ) {
862                 ir_node *proj = get_irn_out(load, i);
863                 long    pn;
864
865                 /* beware of keep edges */
866                 if (is_End(proj))
867                         continue;
868
869                 pn = get_Proj_proj(proj);
870                 m->projs[pn] = proj;
871                 switch (pn) {
872                 case pn_Load_res:
873                         m->value.value = proj;
874                         m->value.mode  = get_irn_mode(proj);
875                         break;
876                 case pn_Load_X_except:
877                         m->flags |= FLAG_EXCEPTION;
878                         break;
879                 case pn_Load_M:
880                         m->mem = proj;
881                         break;
882                 case pn_Load_X_regular:
883                         break;
884                 default:
885                         panic("Unsupported Proj from Load %+F", proj);
886                 }
887         }
888
889         /* check if we can determine the entity that will be loaded */
890         ent = find_constant_entity(ptr);
891
892         if (ent != NULL && get_entity_visibility(ent) != ir_visibility_external) {
893                 /* a static allocation that is not external: there should be NO exception
894                  * when loading even if we cannot replace the load itself. */
895                 ir_node *value = NULL;
896
897                 /* no exception, clear the m fields as it might be checked later again */
898                 if (m->projs[pn_Load_X_except]) {
899                         ir_graph *irg = get_irn_irg(ptr);
900                         exchange(m->projs[pn_Load_X_except], new_r_Bad(irg, mode_X));
901                         m->projs[pn_Load_X_except] = NULL;
902                         m->flags &= ~FLAG_EXCEPTION;
903                         env.changed = 1;
904                 }
905                 if (m->projs[pn_Load_X_regular]) {
906                         exchange(m->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
907                         m->projs[pn_Load_X_regular] = NULL;
908                         env.changed = 1;
909                 }
910
911                 if (get_entity_linkage(ent) & IR_LINKAGE_CONSTANT) {
912                         if (ent->initializer) {
913                                 /* new style initializer */
914                                 value = find_compound_ent_value(ptr);
915                         }
916                         if (value != NULL)
917                                 value = can_replace_load_by_const(load, value);
918                 }
919
920                 if (value != NULL) {
921                         /* we completely replace the load by this value */
922                         DB((dbg, LEVEL_1, "Replacing Load %+F by constant %+F\n", m->node, value));
923                         mark_replace_load(m, value);
924                         return;
925                 }
926         }
927
928         if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
929                 /* only create an address if this node is NOT killed immediately or ignored */
930                 m->value.id = register_address(ptr);
931                 ++env.n_mem_ops;
932         } else {
933                 /* no user, KILL it */
934                 mark_replace_load(m, NULL);
935         }
936 }
937
938 /**
939  * Update a memop for a Store.
940  *
941  * @param m  the memop
942  */
943 static void update_Store_memop(memop_t *m)
944 {
945         ir_node *store = m->node;
946         ir_node *adr   = get_Store_ptr(store);
947
948         if (get_Store_volatility(store) == volatility_is_volatile) {
949                 m->flags |= FLAG_IGNORE;
950         } else {
951                 /* only create an address if this node is NOT ignored */
952                 m->value.id = register_address(adr);
953                 ++env.n_mem_ops;
954         }
955
956         m->value.address = adr;
957
958         for (unsigned i = get_irn_n_outs(store); i-- > 0; ) {
959                 ir_node *proj = get_irn_out(store, i);
960                 long    pn;
961
962                 /* beware of keep edges */
963                 if (is_End(proj))
964                         continue;
965
966                 pn = get_Proj_proj(proj);
967                 m->projs[pn] = proj;
968                 switch (pn) {
969                 case pn_Store_X_except:
970                         m->flags |= FLAG_EXCEPTION;
971                         break;
972                 case pn_Store_M:
973                         m->mem = proj;
974                         break;
975                 case pn_Store_X_regular:
976                         break;
977                 default:
978                         panic("Unsupported Proj from Store %+F", proj);
979                 }
980         }
981         m->value.value = get_Store_value(store);
982         m->value.mode  = get_irn_mode(m->value.value);
983 }
984
985 /**
986  * Update a memop for a Call.
987  *
988  * @param m  the memop
989  */
990 static void update_Call_memop(memop_t *m)
991 {
992         ir_node  *call = m->node;
993         unsigned prop  = get_Call_memory_properties(call);
994
995         if (prop & mtp_property_const) {
996                 /* A constant call did NOT use memory at all, we
997                    can kick it from the list. */
998         } else if (prop & mtp_property_pure) {
999                 /* pure calls READ memory */
1000                 m->flags = 0;
1001         } else
1002                 m->flags = FLAG_KILL_ALL;
1003
1004         for (unsigned i = get_irn_n_outs(call); i-- > 0; ) {
1005                 ir_node *proj = get_irn_out(call, i);
1006
1007                 /* beware of keep edges */
1008                 if (is_End(proj))
1009                         continue;
1010
1011                 switch (get_Proj_proj(proj)) {
1012                 case pn_Call_X_except:
1013                         m->flags |= FLAG_EXCEPTION;
1014                         break;
1015                 case pn_Call_M:
1016                         m->mem = proj;
1017                         break;
1018                 }
1019         }
1020 }
1021
1022 /**
1023  * Update a memop for a Div/Mod.
1024  *
1025  * @param m  the memop
1026  */
1027 static void update_Div_memop(memop_t *m)
1028 {
1029         ir_node *div = m->node;
1030
1031         for (unsigned i = get_irn_n_outs(div); i-- > 0; ) {
1032                 ir_node *proj = get_irn_out(div, i);
1033
1034                 /* beware of keep edges */
1035                 if (is_End(proj))
1036                         continue;
1037
1038                 switch (get_Proj_proj(proj)) {
1039                 case pn_Div_X_except:
1040                         m->flags |= FLAG_EXCEPTION;
1041                         break;
1042                 case pn_Div_M:
1043                         m->mem = proj;
1044                         break;
1045                 }
1046         }
1047 }
1048
1049 static void update_Mod_memop(memop_t *m)
1050 {
1051         ir_node *div = m->node;
1052
1053         for (unsigned i = get_irn_n_outs(div); i-- > 0; ) {
1054                 ir_node *proj = get_irn_out(div, i);
1055
1056                 /* beware of keep edges */
1057                 if (is_End(proj))
1058                         continue;
1059
1060                 switch (get_Proj_proj(proj)) {
1061                 case pn_Mod_X_except:
1062                         m->flags |= FLAG_EXCEPTION;
1063                         break;
1064                 case pn_Mod_M:
1065                         m->mem = proj;
1066                         break;
1067                 }
1068         }
1069 }
1070
1071 /**
1072  * Update a memop for a Phi.
1073  *
1074  * @param m  the memop
1075  */
1076 static void update_Phi_memop(memop_t *m)
1077 {
1078         /* the Phi is its own mem */
1079         m->mem = m->node;
1080 }
1081
1082 /**
1083  * Memory walker: collect all memory ops and build topological lists.
1084  */
1085 static void collect_memops(ir_node *irn, void *ctx)
1086 {
1087         memop_t  *op;
1088         ir_node  *block;
1089         block_t  *entry;
1090
1091         (void) ctx;
1092         if (is_Proj(irn)) {
1093                 /* we can safely ignore ProjM's except the initial memory */
1094                 ir_graph *irg = get_irn_irg(irn);
1095                 if (irn != get_irg_initial_mem(irg))
1096                         return;
1097         }
1098
1099         op    = alloc_memop(irn);
1100         block = get_nodes_block(irn);
1101         entry = get_block_entry(block);
1102
1103         if (is_Phi(irn)) {
1104                 update_Phi_memop(op);
1105                 /* Phis must be always placed first */
1106                 op->next = entry->memop_forward;
1107                 entry->memop_forward = op;
1108                 if (entry->memop_backward == NULL)
1109                         entry->memop_backward = op;
1110         } else {
1111                 switch (get_irn_opcode(irn)) {
1112                 case iro_Load:
1113                         update_Load_memop(op);
1114                         break;
1115                 case iro_Store:
1116                         update_Store_memop(op);
1117                         break;
1118                 case iro_Call:
1119                         update_Call_memop(op);
1120                         break;
1121                 case iro_Sync:
1122                 case iro_Pin:
1123                         op->mem = irn;
1124                         break;
1125                 case iro_Proj:
1126                         /* initial memory */
1127                         op->mem = irn;
1128                         break;
1129                 case iro_Return:
1130                 case iro_End:
1131                         /* we can those to find the memory edge */
1132                         break;
1133                 case iro_Div:
1134                         update_Div_memop(op);
1135                         break;
1136                 case iro_Mod:
1137                         update_Mod_memop(op);
1138                         break;
1139
1140                 case iro_Builtin:
1141                         /* TODO: handle some builtins */
1142                 default:
1143                         /* unsupported operation */
1144                         op->flags = FLAG_KILL_ALL;
1145                 }
1146
1147
1148                 /* all other should be placed last */
1149                 if (entry->memop_backward == NULL) {
1150                         entry->memop_forward = entry->memop_backward = op;
1151                 } else {
1152                         entry->memop_backward->next = op;
1153                         entry->memop_backward       = op;
1154                 }
1155         }
1156 }
1157
1158 /**
1159  * Find an address in the current set.
1160  *
1161  * @param value  the value to be searched for
1162  *
1163  * @return a memop for the value or NULL if the value does
1164  *         not exists in the set or cannot be converted into
1165  *         the requested mode
1166  */
1167 static memop_t *find_address(const value_t *value)
1168 {
1169         if (rbitset_is_set(env.curr_set, value->id)) {
1170                 memop_t *res = env.curr_id_2_memop[value->id];
1171
1172                 if (res->value.mode == value->mode)
1173                         return res;
1174                 /* allow hidden casts */
1175                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1176                     get_mode_arithmetic(value->mode) == irma_twos_complement &&
1177                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1178                         return res;
1179         }
1180         return NULL;
1181 }
1182
1183 /**
1184  * Find an address in the avail_out set.
1185  *
1186  * @param bl     the block
1187  */
1188 static memop_t *find_address_avail(const block_t *bl, unsigned id, const ir_mode *mode)
1189 {
1190         if (rbitset_is_set(bl->avail_out, id)) {
1191                 memop_t *res = bl->id_2_memop_avail[id];
1192
1193                 if (res->value.mode == mode)
1194                         return res;
1195                 /* allow hidden casts */
1196                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1197                     get_mode_arithmetic(mode) == irma_twos_complement &&
1198                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(mode))
1199                         return res;
1200         }
1201         return NULL;
1202 }
1203
1204 /**
1205  * Kill all addresses from the current set.
1206  */
1207 static void kill_all(void)
1208 {
1209         rbitset_clear_all(env.curr_set, env.rbs_size);
1210
1211         /* set sentinel */
1212         rbitset_set(env.curr_set, env.rbs_size - 1);
1213 }
1214
1215 /**
1216  * Kill memops that are not alias free due to a Store value from the current set.
1217  *
1218  * @param value  the Store value
1219  */
1220 static void kill_memops(const value_t *value)
1221 {
1222         size_t end = env.rbs_size - 1;
1223         size_t pos;
1224
1225         for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1226                 memop_t *op = env.curr_id_2_memop[pos];
1227
1228                 if (ir_no_alias != get_alias_relation(value->address, value->mode,
1229                                                           op->value.address, op->value.mode)) {
1230                         rbitset_clear(env.curr_set, pos);
1231                         env.curr_id_2_memop[pos] = NULL;
1232                         DB((dbg, LEVEL_2, "KILLING %+F because of possible alias address %+F\n", op->node, value->address));
1233                 }
1234         }
1235 }
1236
1237 /**
1238  * Add the value of a memop to the current set.
1239  *
1240  * @param op  the memory op
1241  */
1242 static void add_memop(memop_t *op)
1243 {
1244         rbitset_set(env.curr_set, op->value.id);
1245         env.curr_id_2_memop[op->value.id] = op;
1246 }
1247
1248 /**
1249  * Add the value of a memop to the avail_out set.
1250  *
1251  * @param bl  the block
1252  * @param op  the memory op
1253  */
1254 static void add_memop_avail(block_t *bl, memop_t *op)
1255 {
1256         rbitset_set(bl->avail_out, op->value.id);
1257         bl->id_2_memop_avail[op->value.id] = op;
1258 }
1259
1260 /**
1261  * Check, if we can convert a value of one mode to another mode
1262  * without changing the representation of bits.
1263  *
1264  * @param from  the original mode
1265  * @param to    the destination mode
1266  */
1267 static int can_convert_to(const ir_mode *from, const ir_mode *to)
1268 {
1269         if (get_mode_arithmetic(from) == irma_twos_complement &&
1270             get_mode_arithmetic(to) == irma_twos_complement &&
1271             get_mode_size_bits(from) == get_mode_size_bits(to))
1272                 return 1;
1273         return 0;
1274 }
1275
1276 /**
1277  * Add a Conv to the requested mode if needed.
1278  *
1279  * @param irn   the IR-node to convert
1280  * @param mode  the destination mode
1281  *
1282  * @return the possible converted node or NULL
1283  *         if the conversion is not possible
1284  */
1285 static ir_node *conv_to(ir_node *irn, ir_mode *mode)
1286 {
1287         ir_mode *other = get_irn_mode(irn);
1288         if (other != mode) {
1289                 /* different modes: check if conversion is possible without changing the bits */
1290                 if (can_convert_to(other, mode)) {
1291                         ir_node *block = get_nodes_block(irn);
1292                         return new_r_Conv(block, irn, mode);
1293                 }
1294                 /* otherwise not possible ... yet */
1295                 return NULL;
1296         }
1297         return irn;
1298 }
1299
1300 /**
1301  * Update the address of an value if this address was a load result
1302  * and the load is killed now.
1303  *
1304  * @param value  the value whose address is updated
1305  */
1306 static void update_address(value_t *value)
1307 {
1308         if (is_Proj(value->address)) {
1309                 ir_node *load = get_Proj_pred(value->address);
1310
1311                 if (is_Load(load)) {
1312                         const memop_t *op = get_irn_memop(load);
1313
1314                         if (op->flags & FLAG_KILLED_NODE)
1315                                 value->address = op->replace;
1316                 }
1317         }
1318 }
1319
1320 /**
1321  * Do forward dataflow analysis on the given block and calculate the
1322  * GEN and KILL in the current (avail) set.
1323  *
1324  * @param bl  the block
1325  */
1326 static void calc_gen_kill_avail(block_t *bl)
1327 {
1328         memop_t *op;
1329         ir_node *def;
1330
1331         for (op = bl->memop_forward; op != NULL; op = op->next) {
1332                 switch (get_irn_opcode(op->node)) {
1333                 case iro_Phi:
1334                         /* meet */
1335                         break;
1336                 case iro_Sync:
1337                         /* join */
1338                         break;
1339                 case iro_Load:
1340                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1341                                 /* do we have this already? */
1342                                 memop_t *other;
1343
1344                                 update_address(&op->value);
1345                                 other = find_address(&op->value);
1346                                 if (other != NULL && other != op) {
1347                                         def = conv_to(other->value.value, op->value.mode);
1348                                         if (def != NULL) {
1349 #ifdef DEBUG_libfirm
1350                                                 if (is_Store(other->node)) {
1351                                                         /* RAW */
1352                                                         DB((dbg, LEVEL_1, "RAW %+F <- %+F(%+F)\n", op->node, def, other->node));
1353                                                 } else {
1354                                                         /* RAR */
1355                                                         DB((dbg, LEVEL_1, "RAR %+F <- %+F(%+F)\n", op->node, def, other->node));
1356                                                 }
1357 #endif
1358                                                 mark_replace_load(op, def);
1359                                                 /* do NOT change the memop table */
1360                                                 continue;
1361                                         }
1362                                 }
1363                                 /* add this value */
1364                                 add_memop(op);
1365                         }
1366                         break;
1367                 case iro_Store:
1368                         if (! (op->flags & FLAG_KILLED_NODE)) {
1369                                 /* do we have this store already */
1370                                 memop_t *other;
1371
1372                                 update_address(&op->value);
1373                                 other = find_address(&op->value);
1374                                 if (other != NULL) {
1375                                         if (is_Store(other->node)) {
1376                                                 if (op != other && !(other->flags & FLAG_IGNORE) &&
1377                                                     get_nodes_block(other->node) == get_nodes_block(op->node)) {
1378                                                         /*
1379                                                          * A WAW in the same block we can kick the first store.
1380                                                          * This is a shortcut: we know that the second Store will be anticipated
1381                                                          * then in an case.
1382                                                          */
1383                                                         DB((dbg, LEVEL_1, "WAW %+F <- %+F\n", other->node, op->node));
1384                                                         mark_remove_store(other);
1385                                                         /* FIXME: a Load might be get freed due to this killed store */
1386                                                 }
1387                                         } else if (other->value.value == op->value.value && !(op->flags & FLAG_IGNORE)) {
1388                                                 /* WAR */
1389                                                 DB((dbg, LEVEL_1, "WAR %+F <- %+F\n", op->node, other->node));
1390                                                 mark_remove_store(op);
1391                                                 /* do NOT change the memop table */
1392                                                 continue;
1393                                         }
1394                                 }
1395                                 /* KILL all possible aliases */
1396                                 kill_memops(&op->value);
1397                                 /* add this value */
1398                                 add_memop(op);
1399                         }
1400                         break;
1401                 default:
1402                         if (op->flags & FLAG_KILL_ALL)
1403                                 kill_all();
1404                 }
1405         }
1406 }
1407
1408 /**
1409  * Do forward dataflow analysis on a given block to calculate the avail_out set
1410  * for this block only.
1411  *
1412  * @param block  the block
1413  */
1414 static void forward_avail(block_t *bl)
1415 {
1416         /* fill the data from the current block */
1417         env.curr_id_2_memop = bl->id_2_memop_avail;
1418         env.curr_set        = bl->avail_out;
1419
1420         calc_gen_kill_avail(bl);
1421         dump_curr(bl, "Avail_out");
1422 }
1423
1424 /**
1425  * Do backward dataflow analysis on a given block to calculate the antic set
1426  * of Loaded addresses.
1427  *
1428  * @param bl  the block
1429  *
1430  * @return non-zero if the set has changed since last iteration
1431  */
1432 static int backward_antic(block_t *bl)
1433 {
1434         memop_t *op;
1435         ir_node *block = bl->block;
1436         int     n = get_Block_n_cfg_outs(block);
1437
1438         if (n == 1) {
1439                 ir_node  *succ    = get_Block_cfg_out(block, 0);
1440                 block_t  *succ_bl = get_block_entry(succ);
1441                 int      pred_pos = get_Block_cfgpred_pos(succ, block);
1442                 size_t   end      = env.rbs_size - 1;
1443                 size_t   pos;
1444
1445                 kill_all();
1446
1447                 if (bl->trans_results == NULL) {
1448                         /* allocate the translate cache */
1449                         bl->trans_results = OALLOCNZ(&env.obst, memop_t*, env.curr_adr_id);
1450                 }
1451
1452                 /* check for partly redundant values */
1453                 for (pos = rbitset_next(succ_bl->anticL_in, 0, 1);
1454                      pos < end;
1455                      pos = rbitset_next(succ_bl->anticL_in, pos + 1, 1)) {
1456                         /*
1457                          * do Phi-translation here: Note that at this point the nodes are
1458                          * not changed, so we can safely cache the results.
1459                          * However: Loads of Load results ARE bad, because we have no way
1460                           to translate them yet ...
1461                          */
1462                         memop_t *op = bl->trans_results[pos];
1463                         if (op == NULL) {
1464                                 /* not yet translated */
1465                                 ir_node *adr, *trans_adr;
1466
1467                                 op  = succ_bl->id_2_memop_antic[pos];
1468                                 adr = op->value.address;
1469
1470                                 trans_adr = phi_translate(adr, succ, pred_pos);
1471                                 if (trans_adr != adr) {
1472                                         /* create a new entry for the translated one */
1473                                         memop_t *new_op;
1474
1475                                         new_op = alloc_memop(NULL);
1476                                         new_op->value.address = trans_adr;
1477                                         new_op->value.id      = register_address(trans_adr);
1478                                         new_op->value.mode    = op->value.mode;
1479                                         new_op->node          = op->node; /* we need the node to decide if Load/Store */
1480                                         new_op->flags         = op->flags;
1481
1482                                         bl->trans_results[pos] = new_op;
1483                                         op = new_op;
1484                                 }
1485                         }
1486                         env.curr_id_2_memop[op->value.id] = op;
1487                         rbitset_set(env.curr_set, op->value.id);
1488                 }
1489         } else if (n > 1) {
1490                 ir_node *succ    = get_Block_cfg_out(block, 0);
1491                 block_t *succ_bl = get_block_entry(succ);
1492                 int i;
1493
1494                 rbitset_copy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1495                 memcpy(env.curr_id_2_memop, succ_bl->id_2_memop_antic, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1496
1497                 /* Hmm: probably we want kill merges of Loads ans Stores here */
1498                 for (i = n - 1; i > 0; --i) {
1499                         ir_node *succ    = get_Block_cfg_out(bl->block, i);
1500                         block_t *succ_bl = get_block_entry(succ);
1501
1502                         rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1503                 }
1504         } else {
1505                 /* block ends with a noreturn call */
1506                 kill_all();
1507         }
1508
1509         dump_curr(bl, "AnticL_out");
1510
1511         for (op = bl->memop_backward; op != NULL; op = op->prev) {
1512                 switch (get_irn_opcode(op->node)) {
1513                 case iro_Phi:
1514                         /* meet */
1515                         break;
1516                 case iro_Sync:
1517                         /* join */
1518                         break;
1519                 case iro_Load:
1520                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1521                                 /* always add it */
1522                                 add_memop(op);
1523                         }
1524                         break;
1525                 case iro_Store:
1526                         if (! (op->flags & FLAG_KILLED_NODE)) {
1527                                 /* a Store: check which memops must be killed */
1528                                 kill_memops(&op->value);
1529                         }
1530                         break;
1531                 default:
1532                         if (op->flags & FLAG_KILL_ALL)
1533                                 kill_all();
1534                 }
1535         }
1536
1537         memcpy(bl->id_2_memop_antic, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1538         if (! rbitsets_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1539                 /* changed */
1540                 rbitset_copy(bl->anticL_in, env.curr_set, env.rbs_size);
1541                 dump_curr(bl, "AnticL_in*");
1542                 return 1;
1543         }
1544         dump_curr(bl, "AnticL_in");
1545         return 0;
1546 }
1547
1548 /**
1549  * Replace a Load memop by a already known value.
1550  *
1551  * @param op  the Load memop
1552  */
1553 static void replace_load(memop_t *op)
1554 {
1555         ir_node *load = op->node;
1556         ir_node *def  = skip_Id(op->replace);
1557         ir_node *proj;
1558         ir_mode *mode;
1559
1560         if (def != NULL)
1561                 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1562         else {
1563                 if (op->flags & FLAG_EXCEPTION) {
1564                         /* bad: this node is unused and executed for exception only */
1565                         DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1566                         return;
1567                 }
1568                 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1569         }
1570
1571         if (op->mem != NULL) {
1572                 /* in rare cases a Load might have NO memory */
1573                 exchange(op->mem, get_Load_mem(load));
1574         }
1575         proj = op->projs[pn_Load_res];
1576         if (proj != NULL) {
1577                 mode = get_irn_mode(proj);
1578                 if (get_irn_mode(def) != mode) {
1579                         /* a hidden cast */
1580                         dbg_info *db    = get_irn_dbg_info(load);
1581                         ir_node  *block = get_nodes_block(proj);
1582                         def = new_rd_Conv(db, block, def, mode);
1583                 }
1584                 exchange(proj, def);
1585         }
1586         proj = op->projs[pn_Load_X_except];
1587         if (proj != NULL) {
1588                 ir_graph *irg = get_irn_irg(load);
1589                 exchange(proj, new_r_Bad(irg, mode_X));
1590         }
1591         proj = op->projs[pn_Load_X_regular];
1592         if (proj != NULL) {
1593                 exchange(proj, new_r_Jmp(get_nodes_block(load)));
1594         }
1595 }
1596
1597 /**
1598  * Remove a Store memop.
1599  *
1600  * @param op  the Store memop
1601  */
1602 static void remove_store(memop_t *op)
1603 {
1604         ir_node *store = op->node;
1605         ir_node *proj;
1606
1607         DB((dbg, LEVEL_1, "Removing %+F\n", store));
1608
1609         if (op->mem != NULL) {
1610                 /* in rare cases a Store might have no memory */
1611                 exchange(op->mem, get_Store_mem(store));
1612         }
1613         proj = op->projs[pn_Store_X_except];
1614         if (proj != NULL) {
1615                 ir_graph *irg = get_irn_irg(store);
1616                 exchange(proj, new_r_Bad(irg, mode_X));
1617         }
1618         proj = op->projs[pn_Store_X_regular];
1619         if (proj != NULL) {
1620                 exchange(proj, new_r_Jmp(get_nodes_block(store)));
1621         }
1622 }
1623
1624
1625 /**
1626  * Do all necessary replacements for a given block.
1627  *
1628  * @param bl  the block
1629  */
1630 static void do_replacements(block_t *bl)
1631 {
1632         memop_t *op;
1633
1634         for (op = bl->memop_forward; op != NULL; op = op->next) {
1635                 if (op->flags & FLAG_KILLED_NODE) {
1636                         switch (get_irn_opcode(op->node)) {
1637                         case iro_Load:
1638                                 replace_load(op);
1639                                 break;
1640                         case iro_Store:
1641                                 remove_store(op);
1642                                 break;
1643                         }
1644                 }
1645         }
1646 }
1647
1648 /**
1649  * Calculate the Avail_out sets for all basic blocks.
1650  */
1651 static void calcAvail(void)
1652 {
1653         memop_t  **tmp_memop = env.curr_id_2_memop;
1654         unsigned *tmp_set    = env.curr_set;
1655         block_t  *bl;
1656
1657         /* calculate avail_out */
1658         DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1659
1660         /* iterate over all blocks in in any order, skip the start block */
1661         for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1662                 forward_avail(bl);
1663         }
1664
1665         /* restore the current sets */
1666         env.curr_id_2_memop = tmp_memop;
1667         env.curr_set        = tmp_set;
1668 }
1669
1670 /**
1671  * Calculate the Antic_in sets for all basic blocks.
1672  */
1673 static void calcAntic(void)
1674 {
1675         int i, need_iter;
1676
1677         /* calculate antic_out */
1678         DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1679         i = 0;
1680         do {
1681                 block_t *bl;
1682
1683                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1684
1685                 need_iter = 0;
1686
1687                 /* over all blocks in reverse post order */
1688                 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1689                         need_iter |= backward_antic(bl);
1690                 }
1691                 ++i;
1692         } while (need_iter);
1693         DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1694 }
1695
1696 /**
1697  * Return the node representing the last memory in a block.
1698  *
1699  * @param bl  the block
1700  */
1701 static ir_node *find_last_memory(block_t *bl)
1702 {
1703         for (;;) {
1704                 if (bl->memop_backward != NULL) {
1705                         return bl->memop_backward->mem;
1706                 }
1707                 /* if there is NO memory in this block, go to the dominator */
1708                 bl = get_block_entry(get_Block_idom(bl->block));
1709         }
1710 }
1711
1712 /**
1713  * Reroute all memory users of old memory
1714  * to a new memory IR-node.
1715  *
1716  * @param omem  the old memory IR-node
1717  * @param nmem  the new memory IR-node
1718  */
1719 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem)
1720 {
1721         for (unsigned i = get_irn_n_outs(omem); i-- > 0; ) {
1722                 int     n_pos;
1723                 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1724
1725                 set_irn_n(user, n_pos, nmem);
1726         }
1727
1728         /* all edges previously point to omem now point to nmem */
1729         nmem->o.out = omem->o.out;
1730 }
1731
1732 /**
1733  * Reroute memory users of old memory that are dominated by a given block
1734  * to a new memory IR-node.
1735  *
1736  * @param omem     the old memory IR-node
1737  * @param nmem     the new memory IR-node
1738  * @param pass_bl  the block the memory must pass
1739  */
1740 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl)
1741 {
1742         unsigned n = get_irn_n_outs(omem);
1743         ir_def_use_edges *new_out = OALLOCF(&env.obst, ir_def_use_edges, edges, n);
1744
1745         unsigned j = 0;
1746         for (unsigned i = 0; i < n; ++i) {
1747                 int     n_pos;
1748                 ir_node *user   = get_irn_out_ex(omem, i, &n_pos);
1749                 ir_node *use_bl = get_nodes_block(user);
1750
1751
1752                 if (is_Phi(user)) {
1753                         use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1754                 }
1755                 if (block_dominates(pass_bl, use_bl)) {
1756                         /* found an user that is dominated */
1757                         new_out->edges[j].pos = n_pos;
1758                         new_out->edges[j].use = user;
1759                         ++j;
1760
1761                         set_irn_n(user, n_pos, nmem);
1762                 }
1763         }
1764         new_out->n_edges = j;
1765
1766         /* Modify the out structure: we create a new out edge array on our
1767            temporary obstack here. This should be no problem, as we invalidate the
1768            edges at the end either. */
1769         /* first entry is used for the length */
1770         nmem->o.out = new_out;
1771 }
1772
1773 /**
1774  * insert Loads, making partly redundant Loads fully redundant
1775  */
1776 static int insert_Load(block_t *bl)
1777 {
1778         ir_node  *block = bl->block;
1779         int      i, n = get_Block_n_cfgpreds(block);
1780         size_t   end = env.rbs_size - 1;
1781
1782         DB((dbg, LEVEL_3, "processing %+F\n", block));
1783
1784         if (n == 0) {
1785                 /* might still happen for an unreachable block (end for instance) */
1786                 return 0;
1787         }
1788
1789         if (n > 1) {
1790                 ir_node **ins;
1791                 size_t    pos;
1792
1793                 NEW_ARR_A(ir_node *, ins, n);
1794
1795                 rbitset_set_all(env.curr_set, env.rbs_size);
1796
1797                 /* More than one predecessors, calculate the join for all avail_outs ignoring unevaluated
1798                    Blocks. These put in Top anyway. */
1799                 for (i = n - 1; i >= 0; --i) {
1800                         ir_node *pred = skip_Proj(get_Block_cfgpred(block, i));
1801                         ir_node *blk  = get_nodes_block(pred);
1802                         block_t *pred_bl;
1803
1804                         pred_bl = get_block_entry(blk);
1805                         rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
1806
1807                         if (is_Load(pred) || is_Store(pred)) {
1808                                 /* We reached this block by an exception from a Load or Store:
1809                                  * the memop creating the exception was NOT completed than, kill it
1810                                  */
1811                                 memop_t *exc_op = get_irn_memop(pred);
1812                                 rbitset_clear(env.curr_set, exc_op->value.id);
1813                         }
1814
1815                 }
1816                 /*
1817                  * Ensure that all values are in the map: build Phi's if necessary:
1818                  * Note: the last bit is the sentinel and ALWAYS set, so end with -2.
1819                  */
1820                 for (pos = 0; pos < env.rbs_size - 1; ++pos) {
1821                         if (! rbitset_is_set(env.curr_set, pos))
1822                                 env.curr_id_2_memop[pos] = NULL;
1823                         else {
1824                                 int      need_phi = 0;
1825                                 memop_t *first    = NULL;
1826                                 ir_mode *mode     = NULL;
1827
1828                                 for (i = 0; i < n; ++i) {
1829                                         ir_node *pred    = get_Block_cfgpred_block(bl->block, i);
1830                                         block_t *pred_bl = get_block_entry(pred);
1831
1832                                         memop_t *mop = pred_bl->id_2_memop_avail[pos];
1833                                         if (first == NULL) {
1834                                                 first = mop;
1835                                                 ins[0] = first->value.value;
1836                                                 mode = get_irn_mode(ins[0]);
1837
1838                                                 /* no Phi needed so far */
1839                                                 env.curr_id_2_memop[pos] = first;
1840                                         } else {
1841                                                 ins[i] = conv_to(mop->value.value, mode);
1842                                                 if (ins[i] != ins[0]) {
1843                                                         if (ins[i] == NULL) {
1844                                                                 /* conversion failed */
1845                                                                 env.curr_id_2_memop[pos] = NULL;
1846                                                                 rbitset_clear(env.curr_set, pos);
1847                                                                 break;
1848                                                         }
1849                                                         need_phi = 1;
1850                                                 }
1851                                         }
1852                                 }
1853                                 if (need_phi) {
1854                                         /* build a Phi  */
1855                                         ir_node *phi = new_r_Phi(bl->block, n, ins, mode);
1856                                         memop_t *phiop = alloc_memop(phi);
1857
1858                                         phiop->value = first->value;
1859                                         phiop->value.value = phi;
1860
1861                                         /* no need to link it in, as it is a DATA phi */
1862
1863                                         env.curr_id_2_memop[pos] = phiop;
1864
1865                                         DB((dbg, LEVEL_3, "Created new %+F on merging value for address %+F\n", phi, first->value.address));
1866                                 }
1867                         }
1868                 }
1869         } else {
1870                 /* only one predecessor, simply copy the map */
1871                 ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
1872                 block_t *pred_bl = get_block_entry(pred);
1873
1874                 rbitset_copy(env.curr_set, pred_bl->avail_out, env.rbs_size);
1875
1876                 memcpy(env.curr_id_2_memop, pred_bl->id_2_memop_avail, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
1877         }
1878
1879         if (n > 1) {
1880                 size_t pos;
1881
1882                 /* check for partly redundant values */
1883                 for (pos = rbitset_next(bl->anticL_in, 0, 1);
1884                      pos < end;
1885                      pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
1886                         memop_t *op = bl->id_2_memop_antic[pos];
1887                         int     have_some, all_same;
1888                         ir_node *first;
1889
1890                         if (rbitset_is_set(env.curr_set, pos)) {
1891                                 /* already avail */
1892                                 continue;
1893                         }
1894
1895                         assert(is_Load(op->node));
1896
1897                         DB((dbg, LEVEL_3, "anticipated %+F\n", op->node));
1898
1899                         have_some  = 0;
1900                         all_same   = 1;
1901                         first      = 0;
1902                         for (i = n - 1; i >= 0; --i) {
1903                                 ir_node *pred    = get_Block_cfgpred_block(block, i);
1904                                 block_t *pred_bl = get_block_entry(pred);
1905                                 ir_mode *mode    = op->value.mode;
1906                                 memop_t *e;
1907                                 ir_node *adr;
1908
1909                                 adr = phi_translate(op->value.address, block, i);
1910                                 DB((dbg, LEVEL_3, ".. using address %+F in pred %d\n", adr, i));
1911                                 e   = find_address_avail(pred_bl, register_address(adr), mode);
1912                                 if (e == NULL) {
1913                                         ir_node *ef_block = get_nodes_block(adr);
1914                                         if (! block_dominates(ef_block, pred)) {
1915                                                 /* cannot place a copy here */
1916                                                 have_some = 0;
1917                                                 DB((dbg, LEVEL_3, "%+F cannot be moved into predecessor %+F\n", op->node, pred));
1918                                                 break;
1919                                         }
1920                                         DB((dbg, LEVEL_3, "%+F is not available in predecessor %+F\n", op->node, pred));
1921                                         pred_bl->avail = NULL;
1922                                         all_same       = 0;
1923                                 } else {
1924                                         if (e->value.mode != mode && !can_convert_to(e->value.mode, mode)) {
1925                                                 /* cannot create a Phi due to different modes */
1926                                                 have_some = 0;
1927                                                 break;
1928                                         }
1929                                         pred_bl->avail = e;
1930                                         have_some      = 1;
1931                                         DB((dbg, LEVEL_3, "%+F is available for %+F in predecessor %+F\n", e->node, op->node, pred));
1932                                         if (first == NULL)
1933                                                 first = e->node;
1934                                         else if (first != e->node)
1935                                                 all_same = 0;
1936                                 }
1937                         }
1938                         if (have_some && !all_same) {
1939                                 ir_mode *mode = op->value.mode;
1940                                 ir_node **in, *phi;
1941                                 memop_t *phi_op;
1942
1943                                 NEW_ARR_A(ir_node *, in, n);
1944
1945                                 for (i = n - 1; i >= 0; --i) {
1946                                         ir_node *pred    = get_Block_cfgpred_block(block, i);
1947                                         block_t *pred_bl = get_block_entry(pred);
1948
1949                                         if (pred_bl->avail == NULL) {
1950                                                 /* create a new Load here and make to make it fully redundant */
1951                                                 dbg_info *db       = get_irn_dbg_info(op->node);
1952                                                 ir_node  *last_mem = find_last_memory(pred_bl);
1953                                                 ir_node  *load, *def, *adr;
1954                                                 memop_t  *new_op;
1955
1956                                                 assert(last_mem != NULL);
1957                                                 adr  = phi_translate(op->value.address, block, i);
1958                                                 load = new_rd_Load(db, pred, last_mem, adr, mode, cons_none);
1959                                                 def  = new_r_Proj(load, mode, pn_Load_res);
1960                                                 DB((dbg, LEVEL_1, "Created new %+F in %+F for party redundant %+F\n", load, pred, op->node));
1961
1962                                                 new_op                = alloc_memop(load);
1963                                                 new_op->mem           = new_r_Proj(load, mode_M, pn_Load_M);
1964                                                 new_op->value.address = adr;
1965                                                 new_op->value.id      = op->value.id;
1966                                                 new_op->value.mode    = mode;
1967                                                 new_op->value.value   = def;
1968
1969                                                 new_op->projs[pn_Load_M]   = new_op->mem;
1970                                                 new_op->projs[pn_Load_res] = def;
1971
1972                                                 new_op->prev = pred_bl->memop_backward;
1973                                                 if (pred_bl->memop_backward != NULL)
1974                                                         pred_bl->memop_backward->next = new_op;
1975
1976                                                 pred_bl->memop_backward = new_op;
1977
1978                                                 if (pred_bl->memop_forward == NULL)
1979                                                         pred_bl->memop_forward = new_op;
1980
1981                                                 if (get_nodes_block(last_mem) == pred) {
1982                                                         /* We have add a new last memory op in pred block.
1983                                                            If pred had already a last mem, reroute all memory
1984                                                            users. */
1985                                                         reroute_all_mem_users(last_mem, new_op->mem);
1986                                                 } else {
1987                                                         /* reroute only those memory going through the pre block */
1988                                                         reroute_mem_through(last_mem, new_op->mem, pred);
1989                                                 }
1990
1991                                                 /* we added this load at the end, so it will be avail anyway */
1992                                                 add_memop_avail(pred_bl, new_op);
1993                                                 pred_bl->avail = new_op;
1994                                         }
1995                                         in[i] = conv_to(pred_bl->avail->value.value, mode);
1996                                 }
1997                                 phi = new_r_Phi(block, n, in, mode);
1998                                 DB((dbg, LEVEL_1, "Created new %+F in %+F for now redundant %+F\n", phi, block, op->node));
1999
2000                                 phi_op = clone_memop_phi(op, phi);
2001                                 add_memop(phi_op);
2002                         }
2003                 }
2004         }
2005
2006         /* recalculate avail by gen and kill */
2007         calc_gen_kill_avail(bl);
2008
2009         /* always update the map after gen/kill, as values might have been changed due to RAR/WAR/WAW */
2010         memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2011
2012         if (!rbitsets_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
2013                 /* the avail set has changed */
2014                 rbitset_copy(bl->avail_out, env.curr_set, env.rbs_size);
2015                 dump_curr(bl, "Avail_out*");
2016                 return 1;
2017         }
2018         dump_curr(bl, "Avail_out");
2019         return 0;
2020 }
2021
2022 /**
2023  * Insert Loads upwards.
2024  */
2025 static void insert_Loads_upwards(void)
2026 {
2027         int i, need_iter;
2028         block_t *bl;
2029
2030         /* recalculate antic_out and insert Loads */
2031         DB((dbg, LEVEL_2, "Inserting Loads\n"));
2032
2033         i = 0;
2034         do {
2035                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
2036
2037                 need_iter = 0;
2038
2039                 /* over all blocks in reverse post order, skip the start block */
2040                 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
2041                         need_iter |= insert_Load(bl);
2042                 }
2043                 ++i;
2044         } while (need_iter);
2045
2046         DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
2047 }
2048
2049 void opt_ldst(ir_graph *irg)
2050 {
2051         block_t *bl;
2052
2053         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
2054
2055         DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
2056
2057         assure_irg_properties(irg,
2058                 IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES /* we need landing pads */
2059                 | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE
2060                 | IR_GRAPH_PROPERTY_CONSISTENT_OUTS
2061                 | IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
2062                 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
2063
2064         if (get_opt_alias_analysis()) {
2065                 assure_irp_globals_entity_usage_computed();
2066         }
2067
2068         obstack_init(&env.obst);
2069         ir_nodehashmap_init(&env.adr_map);
2070
2071         env.forward       = NULL;
2072         env.backward      = NULL;
2073         env.curr_adr_id   = 0;
2074         env.n_mem_ops     = 0;
2075         env.max_cfg_preds = 0;
2076         env.changed       = 0;
2077         env.start_bl      = get_irg_start_block(irg);
2078         env.end_bl        = get_irg_end_block(irg);
2079 #ifdef DEBUG_libfirm
2080         env.id_2_address  = NEW_ARR_F(ir_node *, 0);
2081 #endif
2082
2083         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2084
2085         /* first step: allocate block entries. Note that some blocks might be
2086            unreachable here. Using the normal walk ensures that ALL blocks are initialized. */
2087         irg_walk_graph(irg, prepare_blocks, link_phis, NULL);
2088
2089         /* produce an inverse post-order list for the CFG: this links only reachable
2090            blocks */
2091         irg_out_block_walk(get_irg_start_block(irg), NULL, inverse_post_order, NULL);
2092
2093         if (! get_Block_mark(env.end_bl)) {
2094                 /*
2095                  * The end block is NOT reachable due to endless loops
2096                  * or no_return calls.
2097                  * Place the end block last.
2098                  * env.backward points to the last block in the list for this purpose.
2099                  */
2100                 env.backward->forward_next = get_block_entry(env.end_bl);
2101
2102                 set_Block_mark(env.end_bl, 1);
2103         }
2104
2105         /* second step: find and sort all memory ops */
2106         walk_memory_irg(irg, collect_memops, NULL, NULL);
2107
2108 #ifdef DEBUG_libfirm
2109         /* check that the backward map is correct */
2110         assert((unsigned)ARR_LEN(env.id_2_address) == env.curr_adr_id);
2111 #endif
2112
2113         if (env.n_mem_ops == 0) {
2114                 /* no memory ops */
2115                 goto no_changes;
2116         }
2117
2118         /* create the backward links. */
2119         env.backward = NULL;
2120         irg_block_walk_graph(irg, NULL, collect_backward, NULL);
2121
2122         /* link the end block in */
2123         bl = get_block_entry(env.end_bl);
2124         bl->backward_next = env.backward;
2125         env.backward      = bl;
2126
2127         /* check that we really start with the start / end block */
2128         assert(env.forward->block  == env.start_bl);
2129         assert(env.backward->block == env.end_bl);
2130
2131         /* create address sets: for now, only the existing addresses are allowed plus one
2132            needed for the sentinel */
2133         env.rbs_size = env.curr_adr_id + 1;
2134
2135         /* create the current set */
2136         env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2137         rbitset_set(env.curr_set, env.rbs_size - 1);
2138         env.curr_id_2_memop = NEW_ARR_DZ(memop_t*, &env.obst, env.rbs_size);
2139
2140         for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2141                 /* set sentinel bits */
2142                 bl->avail_out  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2143                 rbitset_set(bl->avail_out, env.rbs_size - 1);
2144
2145                 bl->id_2_memop_avail = NEW_ARR_DZ(memop_t*, &env.obst, env.rbs_size);
2146
2147                 bl->anticL_in  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2148                 rbitset_set(bl->anticL_in, env.rbs_size - 1);
2149
2150                 bl->id_2_memop_antic = NEW_ARR_DZ(memop_t*, &env.obst, env.rbs_size);
2151         }
2152
2153         (void) dump_block_list;
2154
2155         calcAvail();
2156         calcAntic();
2157
2158         insert_Loads_upwards();
2159
2160         if (env.changed) {
2161                 /* over all blocks in reverse post order */
2162                 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2163                         do_replacements(bl);
2164                 }
2165
2166                 /* not only invalidate but free them. We might allocate new out arrays
2167                    on our obstack which will be deleted yet. */
2168                 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_CONTROL_FLOW);
2169         } else {
2170 no_changes:
2171                 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_ALL);
2172         }
2173
2174         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2175         ir_nodehashmap_destroy(&env.adr_map);
2176         obstack_free(&env.obst, NULL);
2177
2178 #ifdef DEBUG_libfirm
2179         DEL_ARR_F(env.id_2_address);
2180 #endif
2181 }
2182
2183 ir_graph_pass_t *opt_ldst_pass(const char *name)
2184 {
2185         return def_graph_pass(name ? name : "ldst_df", opt_ldst);
2186 }