- added Phi-translation for addresses
[libfirm] / ir / opt / opt_ldst.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Dataflow driven Load/Store optimizations, uses some ideas from
23  *          VanDrunen's LEPRE
24  * @author  Michael Beck
25  * @version $Id$
26  */
27 #include "config.h"
28
29 #include "irnode_t.h"
30 #include "irflag_t.h"
31 #include "array_t.h"
32 #include "ircons.h"
33 #include "irdom.h"
34 #include "irgmod.h"
35 #include "irgwalk.h"
36 #include "irouts.h"
37 #include "irgraph.h"
38 #include "irgopt.h"
39 #include "iropt.h"
40 #include "iroptimize.h"
41 #include "irnodemap.h"
42 #include "raw_bitset.h"
43 #include "debug.h"
44 #include "error.h"
45
46 /* maximum number of output Proj's */
47 #define MAX_PROJ (pn_Load_max > pn_Store_max ? pn_Load_max : pn_Store_max)
48
49 /**
50  * Mapping an address to an dense ID.
51  */
52 typedef struct address_entry_t {
53         unsigned id;          /**< The ID */
54 } address_entry;
55
56 /**
57  * Memop-flags.
58  */
59 enum memop_flags {
60         FLAG_KILL_ALL    = 1, /**< KILL all addresses */
61         FLAG_KILLED_NODE = 2, /**< this node was killed */
62         FLAG_EXCEPTION   = 4, /**< this node has exception flow */
63         FLAG_IGNORE      = 8, /**< ignore this node (volatile or other) */
64 };
65
66 /**
67  * A value: This represents a value stored at a given address in
68  * memory. Do not confuse with values from value numbering.
69  */
70 typedef struct value_t value_t;
71 struct value_t {
72         ir_node  *address;    /**< the address of this value */
73         ir_node  *value;      /**< the value itself */
74         ir_mode  *mode;       /**< the mode of the value */
75         unsigned id;          /**< address id */
76 };
77
78 /**
79  * A memop describes an memory-related operation.
80  * These are Loads/Store and all other ops that might modify
81  * memory (Calls, CopyB) or causing exceptions.
82  */
83 typedef struct memop_t memop_t;
84 struct memop_t {
85         value_t  value;      /**< the value of this memop: only defined for Load/Store */
86         ir_node  *node;      /**< the memory op itself */
87         ir_node  *mem;       /**< the memory FROM this node */
88         ir_node  *replace;   /**< the replacement node if this memop is replaced */
89         memop_t  *next;      /**< links to the next memory op in the block in forward order. */
90         memop_t  *prev;      /**< links to the previous memory op in the block in forward order. */
91         unsigned flags;      /**< memop flags */
92         ir_node  *projs[MAX_PROJ]; /**< Projs of this memory op */
93 };
94
95 /**
96  * Additional data for every basic block.
97  */
98 typedef struct block_t block_t;
99 struct block_t {
100         memop_t  *memop_forward;     /**< topologically sorted list of memory ops in this block */
101         memop_t  *memop_backward;    /**< last memop in the list */
102         unsigned *avail_out;         /**< out-set of available addresses */
103         memop_t  **id_2_memop_avail; /**< maps avail address ids to memops */
104         unsigned *anticL_in;         /**< in-set of anticipated Load addresses */
105         memop_t  **id_2_memop_antic; /**< maps anticipated address ids to memops */
106         ir_node  *block;             /**< the associated block */
107         block_t  *forward_next;      /**< next block entry for forward iteration */
108         block_t  *backward_next;     /**< next block entry for backward iteration */
109         memop_t  *avail;             /**< used locally for the avail map */
110 };
111
112 /**
113  * Metadata for this pass.
114  */
115 typedef struct ldst_env_t {
116         struct obstack  obst;              /**< obstack for temporary data */
117         ir_nodemap_t    adr_map;           /**< Map addresses to */
118         block_t         *forward;          /**< Inverse post-order list of all blocks Start->End */
119         block_t         *backward;         /**< Inverse post-order list of all blocks End->Start */
120         ir_node         *start_bl;         /**< start block of the current graph */
121         ir_node         *end_bl;           /**< end block of the current graph */
122         unsigned        *curr_set;         /**< current set of addresses */
123         memop_t         **curr_id_2_memop; /**< current map of address ids to memops */
124         unsigned        curr_adr_id;       /**< number for address mapping */
125         unsigned        n_mem_ops;         /**< number of memory operations (Loads/Stores) */
126         unsigned        rbs_size;          /**< size of all bitsets in bytes */
127         int             max_cfg_preds;     /**< maximum number of block cfg predecessors */
128         int             changed;           /**< Flags for changed graph state */
129 #ifdef DEBUG_libfirm
130         ir_node         **id_2_address;    /**< maps an id to the used address */
131 #endif
132 } ldst_env;
133
134 #ifdef DEBUG_libfirm
135
136 static firm_dbg_module_t *dbg;
137
138 /* the one and only environment */
139 static ldst_env env;
140
141 /**
142  * Dumps the block list.
143  *
144  * @param ldst environment
145  */
146 static void dump_block_list(ldst_env *env) {
147         block_t *entry;
148         memop_t *op;
149         int     i;
150
151         for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
152                 DB((dbg, LEVEL_2, "%+F {", entry->block));
153
154                 i = 0;
155                 for (op = entry->memop_forward; op != NULL; op = op->next) {
156                         if (i == 0) {
157                                 DB((dbg, LEVEL_2, "\n\t"));
158                         }                       DB((dbg, LEVEL_2, "%+F", op->node));
159                         if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
160                                 DB((dbg, LEVEL_2, "X"));
161                         else if (op->flags & FLAG_KILL_ALL)
162                                 DB((dbg, LEVEL_2, "K"));
163                         DB((dbg, LEVEL_2, ", "));
164
165                         i = (i + 1) & 3;
166                 }
167                 DB((dbg, LEVEL_2, "\n}\n\n"));
168         }
169 }
170
171 /**
172  * Dumps the current set.
173  *
174  * @param bl   current block
175  * @param s    name of the set
176  */
177 static void dump_curr(block_t *bl, const char *s) {
178         unsigned pos = 0;
179         unsigned end = env.rbs_size - 1;
180         int      i;
181
182         DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
183         i = 0;
184         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
185                 memop_t *op = env.curr_id_2_memop[pos];
186
187                 if (i == 0) {
188                         DB((dbg, LEVEL_2, "\n\t"));
189                 }
190
191                 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
192                 i = (i + 1) & 3;
193         }
194         DB((dbg, LEVEL_2, "\n}\n"));
195 }
196
197 #else
198 #define dump_block_list()
199 #define dump_curr(bl, s)
200 #endif /* DEBUG_libfirm */
201
202 /** Get the block entry for a block node */
203 static block_t *get_block_entry(const ir_node *block) {
204         assert(is_Block(block));
205
206         return get_irn_link(block);
207 }
208
209 /** Get the memop entry for a memory operation node */
210 static memop_t *get_irn_memop(const ir_node *irn) {
211         assert(! is_Block(irn));
212         return get_irn_link(irn);
213 }
214
215 /**
216  * Walk over the memory edges from definition to users.
217  * This ensures, that even operation without memory output are found.
218  *
219  * @param irn   start node
220  * @param pre   pre walker function
221  * @param post  post walker function
222  * @param ctx   context parameter for the walker functions
223  */
224 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx) {
225         int     i;
226         ir_mode *mode;
227
228         mark_irn_visited(irn);
229
230         if (pre)
231                 pre(irn, ctx);
232
233         mode = get_irn_mode(irn);
234         if (mode == mode_M) {
235                 /* every successor uses memory */
236                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
237                         ir_node *succ = get_irn_out(irn, i);
238
239                         if (! irn_visited(succ))
240                                 walk_memory(succ, pre, post, ctx);
241                 }
242         } else if (mode == mode_T) {
243                 /* only some Proj's uses memory */
244                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
245                         ir_node *proj = get_irn_out(irn, i);
246
247                         if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
248                                 walk_memory(proj, pre, post, ctx);
249                 }
250         }
251         if (post)
252                 post(irn, ctx);
253 }
254
255 /**
256  * Walks over all memory nodes of a graph.
257  *
258  * @param irg   a graph
259  * @param pre   pre walker function
260  * @param post  post walker function
261  * @param ctx   context parameter for the walker functions
262  */
263 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx) {
264         inc_irg_visited(irg);
265
266         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
267
268         /*
269          * there are two possible sources for memory: initial_mem and nomem
270          * we ignore nomem as this should NOT change the memory
271          */
272         walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
273
274         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
275 }
276
277 /**
278  * Walker: allocate an block entry for every block.
279  */
280 static void prepare_blocks(ir_node *block, void *ctx) {
281         (void)ctx;
282
283         if (is_Block(block)) {
284                 block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
285                 int     n;
286
287                 entry->memop_forward    = NULL;
288                 entry->memop_backward   = NULL;
289                 entry->avail_out        = NULL;
290                 entry->id_2_memop_avail = NULL;
291                 entry->anticL_in        = NULL;
292                 entry->id_2_memop_antic = NULL;
293                 entry->block            = block;
294                 entry->forward_next     = NULL;
295                 entry->backward_next    = NULL;
296                 entry->avail            = NULL;
297                 set_irn_link(block, entry);
298
299                 set_Block_phis(block, NULL);
300
301                 /* use block marks to track unreachable blocks */
302                 set_Block_mark(block, 0);
303
304                 n = get_Block_n_cfgpreds(block);
305                 if (n > env.max_cfg_preds)
306                         env.max_cfg_preds = n;
307         }
308 }
309
310 /**
311  * Post-Walker, link in all Phi's
312  */
313 static void link_phis(ir_node *irn, void *ctx) {
314         (void)ctx;
315
316         if (is_Phi(irn)) {
317                 ir_node *block = get_nodes_block(irn);
318                 add_Block_phi(block, irn);
319         }
320 }
321
322 /**
323  * Block walker: creates the inverse post-order list for the CFG.
324  */
325 static void inverse_post_order(ir_node *block, void *ctx) {
326         block_t *entry = get_block_entry(block);
327
328         (void)ctx;
329
330         /* mark this block IS reachable from start */
331         set_Block_mark(block, 1);
332
333         /* create the list in inverse order */
334         entry->forward_next = env.forward;
335         env.forward         = entry;
336
337         /* remember the first visited (last in list) entry, needed for later */
338         if (env.backward == NULL)
339                 env.backward = entry;
340 }
341
342 /**
343  * Block walker: create backward links for the memops of a block.
344  */
345 static void collect_backward(ir_node *block, void *ctx) {
346         block_t *entry = get_block_entry(block);
347         memop_t *last, *op;
348
349         (void)ctx;
350
351         /*
352          * Do NOT link in the end block yet. We want it to be
353          * the first in the list. This is NOT guaranteed by the walker
354          * if we have endless loops.
355          */
356         if (block != env.end_bl) {
357                 entry->backward_next = env.backward;
358
359                 /* create the list in inverse order */
360                 env.backward = entry;
361         }
362
363         /* create backward links for all memory ops */
364         last = NULL;
365         for (op = entry->memop_forward; op != NULL; op = op->next) {
366                 op->prev = last;
367                 last     = op;
368         }
369         entry->memop_backward = last;
370 }
371
372 /**
373  * Allocate a memop.
374  *
375  * @param irn  the IR-node representing the memop
376  */
377 static memop_t *alloc_memop(ir_node *irn) {
378         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
379
380         m->value.address = NULL;
381         m->value.value   = NULL;
382         m->value.mode    = NULL;
383
384         m->node          = irn;
385         m->mem           = NULL;
386         m->replace       = NULL;
387         m->next          = NULL;
388         m->flags         = 0;
389
390         memset(m->projs, 0, sizeof(m->projs));
391
392         set_irn_link(irn, m);
393         return m;
394 }
395
396 /**
397  * Create a memop for a Phi-replacement.
398  *
399  * @param op   the memop to clone
400  * @param phi  the Phi-node representing the new value
401  */
402 static memop_t *clone_memop_phi(memop_t *op, ir_node *phi) {
403         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
404
405         m->value         = op->value;
406         m->value.value   = phi;
407
408         m->node          = phi;
409         m->replace       = NULL;
410         m->next          = NULL;
411         m->flags         = 0;
412
413         set_irn_link(phi, m);
414         return m;
415 }
416
417 /**
418  * Register an address and allocate an ID for it.
419  *
420  * @param adr  the IR-node representing the address
421  */
422 static unsigned register_address(ir_node *adr) {
423         address_entry *entry;
424
425         /* skip Confirms and Casts */
426 restart:
427         if (is_Confirm(adr)) {
428                 adr = get_Confirm_value(adr);
429                 goto restart;
430         }
431         if (is_Cast(adr)) {
432                 adr = get_Cast_op(adr);
433                 goto restart;
434         }
435
436         entry = ir_nodemap_get(&env.adr_map, adr);
437
438         if (entry == NULL) {
439                 /* new address */
440                 entry = obstack_alloc(&env.obst, sizeof(*entry));
441
442                 entry->id = env.curr_adr_id++;
443                 ir_nodemap_insert(&env.adr_map, adr, entry);
444
445                 DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
446 #ifdef DEBUG_libfirm
447                 ARR_APP1(ir_node *, env.id_2_address, adr);
448 #endif
449         }
450         return entry->id;
451 }
452
453 /**
454  * Return the memory properties of a call node.
455  *
456  * @param call  the call node
457  *
458  * return a bitset of mtp_property_const and mtp_property_pure
459  */
460 static unsigned get_Call_memory_properties(ir_node *call) {
461         ir_type *call_tp = get_Call_type(call);
462         unsigned prop = get_method_additional_properties(call_tp);
463
464         /* check first the call type */
465         if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
466                 /* try the called entity */
467                 ir_node *ptr = get_Call_ptr(call);
468
469                 if (is_Global(ptr)) {
470                         ir_entity *ent = get_Global_entity(ptr);
471
472                         prop = get_entity_additional_properties(ent);
473                 }
474         }
475         return prop & (mtp_property_const|mtp_property_pure);
476 }
477
478 /**
479  * Returns an entity if the address ptr points to a constant one.
480  *
481  * @param ptr  the address
482  *
483  * @return an entity or NULL
484  */
485 static ir_entity *find_constant_entity(ir_node *ptr)
486 {
487         for (;;) {
488                 if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
489                         return get_SymConst_entity(ptr);
490                 } else if (is_Sel(ptr)) {
491                         ir_entity *ent = get_Sel_entity(ptr);
492                         ir_type   *tp  = get_entity_owner(ent);
493
494                         /* Do not fiddle with polymorphism. */
495                         if (is_Class_type(get_entity_owner(ent)) &&
496                                 ((get_entity_n_overwrites(ent)    != 0) ||
497                                 (get_entity_n_overwrittenby(ent) != 0)   ) )
498                                 return NULL;
499
500                         if (is_Array_type(tp)) {
501                                 /* check bounds */
502                                 int i, n;
503
504                                 for (i = 0, n = get_Sel_n_indexs(ptr); i < n; ++i) {
505                                         ir_node *bound;
506                                         tarval *tlower, *tupper;
507                                         ir_node *index = get_Sel_index(ptr, i);
508                                         tarval *tv     = computed_value(index);
509
510                                         /* check if the index is constant */
511                                         if (tv == tarval_bad)
512                                                 return NULL;
513
514                                         bound  = get_array_lower_bound(tp, i);
515                                         tlower = computed_value(bound);
516                                         bound  = get_array_upper_bound(tp, i);
517                                         tupper = computed_value(bound);
518
519                                         if (tlower == tarval_bad || tupper == tarval_bad)
520                                                 return NULL;
521
522                                         if (tarval_cmp(tv, tlower) & pn_Cmp_Lt)
523                                                 return NULL;
524                                         if (tarval_cmp(tupper, tv) & pn_Cmp_Lt)
525                                                 return NULL;
526
527                                         /* ok, bounds check finished */
528                                 }
529                         }
530
531                         if (variability_constant == get_entity_variability(ent))
532                                 return ent;
533
534                         /* try next */
535                         ptr = get_Sel_ptr(ptr);
536                 } else if (is_Add(ptr)) {
537                         ir_node *l = get_Add_left(ptr);
538                         ir_node *r = get_Add_right(ptr);
539
540                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
541                                 ptr = l;
542                         else if (get_irn_mode(r) == get_irn_mode(ptr) && is_Const(l))
543                                 ptr = r;
544                         else
545                                 return NULL;
546
547                         /* for now, we support only one addition, reassoc should fold all others */
548                         if (! is_SymConst(ptr) && !is_Sel(ptr))
549                                 return NULL;
550                 } else if (is_Sub(ptr)) {
551                         ir_node *l = get_Sub_left(ptr);
552                         ir_node *r = get_Sub_right(ptr);
553
554                         if (get_irn_mode(l) == get_irn_mode(ptr) &&     is_Const(r))
555                                 ptr = l;
556                         else
557                                 return NULL;
558                         /* for now, we support only one subtraction, reassoc should fold all others */
559                         if (! is_SymConst(ptr) && !is_Sel(ptr))
560                                 return NULL;
561                 } else
562                         return NULL;
563         }
564 }  /* find_constant_entity */
565
566 /**
567  * Return the Selection index of a Sel node from dimension n
568  */
569 static long get_Sel_array_index_long(ir_node *n, int dim) {
570         ir_node *index = get_Sel_index(n, dim);
571         assert(is_Const(index));
572         return get_tarval_long(get_Const_tarval(index));
573 }  /* get_Sel_array_index_long */
574
575 /**
576  * Returns the accessed component graph path for an
577  * node computing an address.
578  *
579  * @param ptr    the node computing the address
580  * @param depth  current depth in steps upward from the root
581  *               of the address
582  */
583 static compound_graph_path *rec_get_accessed_path(ir_node *ptr, int depth) {
584         compound_graph_path *res = NULL;
585         ir_entity           *root, *field, *ent;
586         int                 path_len, pos, idx;
587         tarval              *tv;
588         ir_type             *tp;
589
590         if (is_SymConst(ptr)) {
591                 /* a SymConst. If the depth is 0, this is an access to a global
592                  * entity and we don't need a component path, else we know
593                  * at least its length.
594                  */
595                 assert(get_SymConst_kind(ptr) == symconst_addr_ent);
596                 root = get_SymConst_entity(ptr);
597                 res = (depth == 0) ? NULL : new_compound_graph_path(get_entity_type(root), depth);
598         } else if (is_Sel(ptr)) {
599                 /* it's a Sel, go up until we find the root */
600                 res = rec_get_accessed_path(get_Sel_ptr(ptr), depth+1);
601                 if (res == NULL)
602                         return NULL;
603
604                 /* fill up the step in the path at the current position */
605                 field    = get_Sel_entity(ptr);
606                 path_len = get_compound_graph_path_length(res);
607                 pos      = path_len - depth - 1;
608                 set_compound_graph_path_node(res, pos, field);
609
610                 if (is_Array_type(get_entity_owner(field))) {
611                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
612                         set_compound_graph_path_array_index(res, pos, get_Sel_array_index_long(ptr, 0));
613                 }
614         } else if (is_Add(ptr)) {
615                 ir_node *l    = get_Add_left(ptr);
616                 ir_node *r    = get_Add_right(ptr);
617                 ir_mode *mode = get_irn_mode(ptr);
618                 tarval  *tmp;
619
620                 if (is_Const(r) && get_irn_mode(l) == mode) {
621                         ptr = l;
622                         tv  = get_Const_tarval(r);
623                 } else {
624                         ptr = r;
625                         tv  = get_Const_tarval(l);
626                 }
627 ptr_arith:
628                 mode = get_tarval_mode(tv);
629                 tmp  = tv;
630
631                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
632                 if (is_Sel(ptr)) {
633                         field = get_Sel_entity(ptr);
634                 } else {
635                         field = get_SymConst_entity(ptr);
636                 }
637                 idx = 0;
638                 for (ent = field;;) {
639                         unsigned size;
640                         tarval   *sz, *tv_index, *tlower, *tupper;
641                         ir_node  *bound;
642
643                         tp = get_entity_type(ent);
644                         if (! is_Array_type(tp))
645                                 break;
646                         ent = get_array_element_entity(tp);
647                         size = get_type_size_bytes(get_entity_type(ent));
648                         sz   = new_tarval_from_long(size, mode);
649
650                         tv_index = tarval_div(tmp, sz);
651                         tmp      = tarval_mod(tmp, sz);
652
653                         if (tv_index == tarval_bad || tmp == tarval_bad)
654                                 return NULL;
655
656                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
657                         bound  = get_array_lower_bound(tp, 0);
658                         tlower = computed_value(bound);
659                         bound  = get_array_upper_bound(tp, 0);
660                         tupper = computed_value(bound);
661
662                         if (tlower == tarval_bad || tupper == tarval_bad)
663                                 return NULL;
664
665                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
666                                 return NULL;
667                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
668                                 return NULL;
669
670                         /* ok, bounds check finished */
671                         ++idx;
672                 }
673                 if (! tarval_is_null(tmp)) {
674                         /* access to some struct/union member */
675                         return NULL;
676                 }
677
678                 /* should be at least ONE array */
679                 if (idx == 0)
680                         return NULL;
681
682                 res = rec_get_accessed_path(ptr, depth + idx);
683                 if (res == NULL)
684                         return NULL;
685
686                 path_len = get_compound_graph_path_length(res);
687                 pos      = path_len - depth - idx;
688
689                 for (ent = field;;) {
690                         unsigned size;
691                         tarval   *sz, *tv_index;
692                         long     index;
693
694                         tp = get_entity_type(ent);
695                         if (! is_Array_type(tp))
696                                 break;
697                         ent = get_array_element_entity(tp);
698                         set_compound_graph_path_node(res, pos, ent);
699
700                         size = get_type_size_bytes(get_entity_type(ent));
701                         sz   = new_tarval_from_long(size, mode);
702
703                         tv_index = tarval_div(tv, sz);
704                         tv       = tarval_mod(tv, sz);
705
706                         /* worked above, should work again */
707                         assert(tv_index != tarval_bad && tv != tarval_bad);
708
709                         /* bounds already checked above */
710                         index = get_tarval_long(tv_index);
711                         set_compound_graph_path_array_index(res, pos, index);
712                         ++pos;
713                 }
714         } else if (is_Sub(ptr)) {
715                 ir_node *l = get_Sub_left(ptr);
716                 ir_node *r = get_Sub_right(ptr);
717
718                 ptr = l;
719                 tv  = get_Const_tarval(r);
720                 tv  = tarval_neg(tv);
721                 goto ptr_arith;
722         }
723         return res;
724 }  /* rec_get_accessed_path */
725
726 /**
727  * Returns an access path or NULL.  The access path is only
728  * valid, if the graph is in phase_high and _no_ address computation is used.
729  */
730 static compound_graph_path *get_accessed_path(ir_node *ptr) {
731         compound_graph_path *gr = rec_get_accessed_path(ptr, 0);
732         return gr;
733 }  /* get_accessed_path */
734
735 typedef struct path_entry {
736         ir_entity         *ent;
737         struct path_entry *next;
738         long              index;
739 } path_entry;
740
741 static ir_node *rec_find_compound_ent_value(ir_node *ptr, path_entry *next) {
742         path_entry       entry, *p;
743         ir_entity        *ent, *field;
744         ir_initializer_t *initializer;
745         tarval           *tv;
746         ir_type          *tp;
747         unsigned         n;
748
749         entry.next = next;
750         if (is_SymConst(ptr)) {
751                 /* found the root */
752                 ent         = get_SymConst_entity(ptr);
753                 initializer = get_entity_initializer(ent);
754                 for (p = next; p != NULL;) {
755                         if (initializer->kind != IR_INITIALIZER_COMPOUND)
756                                 return NULL;
757                         n  = get_initializer_compound_n_entries(initializer);
758                         tp = get_entity_type(ent);
759
760                         if (is_Array_type(tp)) {
761                                 ent = get_array_element_entity(tp);
762                                 if (ent != p->ent) {
763                                         /* a missing [0] */
764                                         if (0 >= n)
765                                                 return NULL;
766                                         initializer = get_initializer_compound_value(initializer, 0);
767                                         continue;
768                                 }
769                         }
770                         if (p->index >= (int) n)
771                                 return NULL;
772                         initializer = get_initializer_compound_value(initializer, p->index);
773
774                         ent = p->ent;
775                         p   = p->next;
776                 }
777                 tp = get_entity_type(ent);
778                 while (is_Array_type(tp)) {
779                         ent = get_array_element_entity(tp);
780                         tp = get_entity_type(ent);
781                         /* a missing [0] */
782                         n  = get_initializer_compound_n_entries(initializer);
783                         if (0 >= n)
784                                 return NULL;
785                         initializer = get_initializer_compound_value(initializer, 0);
786                 }
787
788                 switch (initializer->kind) {
789                 case IR_INITIALIZER_CONST:
790                         return get_initializer_const_value(initializer);
791                 case IR_INITIALIZER_TARVAL:
792                 case IR_INITIALIZER_NULL:
793                 default:
794                         return NULL;
795                 }
796         } else if (is_Sel(ptr)) {
797                 entry.ent = field = get_Sel_entity(ptr);
798                 tp = get_entity_owner(field);
799                 if (is_Array_type(tp)) {
800                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
801                         entry.index = get_Sel_array_index_long(ptr, 0) - get_array_lower_bound_int(tp, 0);
802                 } else {
803                         int i, n_members = get_compound_n_members(tp);
804                         for (i = 0; i < n_members; ++i) {
805                                 if (get_compound_member(tp, i) == field)
806                                         break;
807                         }
808                         if (i >= n_members) {
809                                 /* not found: should NOT happen */
810                                 return NULL;
811                         }
812                         entry.index = i;
813                 }
814                 return rec_find_compound_ent_value(get_Sel_ptr(ptr), &entry);
815         }  else if (is_Add(ptr)) {
816                 ir_node  *l = get_Add_left(ptr);
817                 ir_node  *r = get_Add_right(ptr);
818                 ir_mode  *mode;
819                 unsigned pos;
820
821                 if (is_Const(r)) {
822                         ptr = l;
823                         tv  = get_Const_tarval(r);
824                 } else {
825                         ptr = r;
826                         tv  = get_Const_tarval(l);
827                 }
828 ptr_arith:
829                 mode = get_tarval_mode(tv);
830
831                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
832                 if (is_Sel(ptr)) {
833                         field = get_Sel_entity(ptr);
834                 } else {
835                         field = get_SymConst_entity(ptr);
836                 }
837
838                 /* count needed entries */
839                 pos = 0;
840                 for (ent = field;;) {
841                         tp = get_entity_type(ent);
842                         if (! is_Array_type(tp))
843                                 break;
844                         ent = get_array_element_entity(tp);
845                         ++pos;
846                 }
847                 /* should be at least ONE entry */
848                 if (pos == 0)
849                         return NULL;
850
851                 /* allocate the right number of entries */
852                 NEW_ARR_A(path_entry, p, pos);
853
854                 /* fill them up */
855                 pos = 0;
856                 for (ent = field;;) {
857                         unsigned size;
858                         tarval   *sz, *tv_index, *tlower, *tupper;
859                         long     index;
860                         ir_node  *bound;
861
862                         tp = get_entity_type(ent);
863                         if (! is_Array_type(tp))
864                                 break;
865                         ent = get_array_element_entity(tp);
866                         p[pos].ent  = ent;
867                         p[pos].next = &p[pos + 1];
868
869                         size = get_type_size_bytes(get_entity_type(ent));
870                         sz   = new_tarval_from_long(size, mode);
871
872                         tv_index = tarval_div(tv, sz);
873                         tv       = tarval_mod(tv, sz);
874
875                         if (tv_index == tarval_bad || tv == tarval_bad)
876                                 return NULL;
877
878                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
879                         bound  = get_array_lower_bound(tp, 0);
880                         tlower = computed_value(bound);
881                         bound  = get_array_upper_bound(tp, 0);
882                         tupper = computed_value(bound);
883
884                         if (tlower == tarval_bad || tupper == tarval_bad)
885                                 return NULL;
886
887                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
888                                 return NULL;
889                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
890                                 return NULL;
891
892                         /* ok, bounds check finished */
893                         index = get_tarval_long(tv_index);
894                         p[pos].index = index;
895                         ++pos;
896                 }
897                 if (! tarval_is_null(tv)) {
898                         /* hmm, wrong access */
899                         return NULL;
900                 }
901                 p[pos - 1].next = next;
902                 return rec_find_compound_ent_value(ptr, p);
903         } else if (is_Sub(ptr)) {
904                 ir_node *l = get_Sub_left(ptr);
905                 ir_node *r = get_Sub_right(ptr);
906
907                 ptr = l;
908                 tv  = get_Const_tarval(r);
909                 tv  = tarval_neg(tv);
910                 goto ptr_arith;
911         }
912         return NULL;
913 }  /* rec_find_compound_ent_value */
914
915 static ir_node *find_compound_ent_value(ir_node *ptr) {
916         return rec_find_compound_ent_value(ptr, NULL);
917 }  /* find_compound_ent_value */
918
919 /**
920  * Mark a Load memop to be replace by a definition
921  *
922  * @param op  the Load memop
923  */
924 static void mark_replace_load(memop_t *op, ir_node *def) {
925         op->replace = def;
926         op->flags |= FLAG_KILLED_NODE;
927         env.changed = 1;
928 }  /* mark_replace_load */
929
930 /**
931  * Mark a Store memop to be removed.
932  *
933  * @param op  the Store memop
934  */
935 static void mark_remove_store(memop_t *op) {
936         op->flags |= FLAG_KILLED_NODE;
937         env.changed = 1;
938 }  /* mark_remove_store */
939
940 /**
941  * Update a memop for a Load.
942  *
943  * @param m  the memop
944  */
945 static void update_Load_memop(memop_t *m) {
946         int       i;
947         ir_node   *load = m->node;
948         ir_node   *ptr;
949         ir_entity *ent;
950
951         if (get_Load_volatility(load) == volatility_is_volatile)
952                 m->flags |= FLAG_IGNORE;
953
954         ptr = get_Load_ptr(load);
955
956         m->value.address = ptr;
957
958         for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
959                 ir_node *proj = get_irn_out(load, i);
960                 long    pn;
961
962                 /* beware of keep edges */
963                 if (is_End(proj))
964                         continue;
965
966                 pn = get_Proj_proj(proj);
967                 m->projs[pn] = proj;
968                 switch (pn) {
969                 case pn_Load_res:
970                         m->value.value = proj;
971                         m->value.mode  = get_irn_mode(proj);
972                         break;
973                 case pn_Load_X_except:
974                         m->flags |= FLAG_EXCEPTION;
975                         break;
976                 case pn_Load_M:
977                         m->mem = proj;
978                         break;
979                 case pn_Load_X_regular:
980                         break;
981                 default:
982                         panic("Unsupported Proj from Load %+F", proj);
983                 }
984         }
985
986         /* check if we can determine the entity that will be loaded */
987         ent = find_constant_entity(ptr);
988
989         if (ent != NULL                                     &&
990                 allocation_static == get_entity_allocation(ent) &&
991                 visibility_external_allocated != get_entity_visibility(ent)) {
992                 /* a static allocation that is not external: there should be NO exception
993                  * when loading even if we cannot replace the load itself. */
994                 ir_node *value = NULL;
995
996                 /* no exception, clear the m fields as it might be checked later again */
997                 if (m->projs[pn_Load_X_except]) {
998                         exchange(m->projs[pn_Load_X_except], new_Bad());
999                         m->projs[pn_Load_X_except] = NULL;
1000                         m->flags &= ~FLAG_EXCEPTION;
1001                         env.changed = 1;
1002                 }
1003                 if (m->projs[pn_Load_X_regular]) {
1004                         exchange(m->projs[pn_Load_X_regular], new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1005                         m->projs[pn_Load_X_regular] = NULL;
1006                         env.changed = 1;
1007                 }
1008
1009                 if (variability_constant == get_entity_variability(ent)) {
1010                         if (is_atomic_entity(ent)) {
1011                                 /* Might not be atomic after lowering of Sels.  In this case we
1012                                  * could also load, but it's more complicated. */
1013                                 /* more simpler case: we load the content of a constant value:
1014                                  * replace it by the constant itself */
1015                                 value = get_atomic_ent_value(ent);
1016                         } else if (ent->has_initializer) {
1017                                 /* new style initializer */
1018                                 value = find_compound_ent_value(ptr);
1019                         } else {
1020                                 /* old style initializer */
1021                                 compound_graph_path *path = get_accessed_path(ptr);
1022
1023                                 if (path != NULL) {
1024                                         assert(is_proper_compound_graph_path(path, get_compound_graph_path_length(path)-1));
1025
1026                                         value = get_compound_ent_value_by_path(ent, path);
1027                                         DB((dbg, LEVEL_1, "  Constant access at %F%F resulted in %+F\n", ent, path, value));
1028                                         free_compound_graph_path(path);
1029                                 }
1030                         }
1031                         if (value != NULL)
1032                                 value = can_replace_load_by_const(load, value);
1033                 }
1034
1035                 if (value != NULL) {
1036                         /* we completely replace the load by this value */
1037                         DB((dbg, LEVEL_1, "Replacing Load %+F by constant %+F\n", m->node, value));
1038                         mark_replace_load(m, value);
1039                         return;
1040                 }
1041         }
1042
1043         if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
1044                 /* only create an address if this node is NOT killed immediately or ignored */
1045                 m->value.id = register_address(ptr);
1046                 ++env.n_mem_ops;
1047         } else {
1048                 /* no user, KILL it */
1049                 mark_replace_load(m, NULL);
1050         }
1051 }
1052
1053 /**
1054  * Update a memop for a Store.
1055  *
1056  * @param m  the memop
1057  */
1058 static void update_Store_memop(memop_t *m) {
1059         int     i;
1060         ir_node *store = m->node;
1061         ir_node *adr   = get_Store_ptr(store);
1062
1063         if (get_Store_volatility(store) == volatility_is_volatile) {
1064                 m->flags |= FLAG_IGNORE;
1065         } else {
1066                 /* only create an address if this node is NOT ignored */
1067                 m->value.id = register_address(adr);
1068                 ++env.n_mem_ops;
1069         }
1070
1071         m->value.address = adr;
1072
1073         for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
1074                 ir_node *proj = get_irn_out(store, i);
1075                 long    pn;
1076
1077                 /* beware of keep edges */
1078                 if (is_End(proj))
1079                         continue;
1080
1081                 pn = get_Proj_proj(proj);
1082                 m->projs[pn] = proj;
1083                 switch (pn) {
1084                 case pn_Store_X_except:
1085                         m->flags |= FLAG_EXCEPTION;
1086                         break;
1087                 case pn_Store_M:
1088                         m->mem = proj;
1089                         break;
1090                 case pn_Store_X_regular:
1091                         break;
1092                 default:
1093                         panic("Unsupported Proj from Store %+F", proj);
1094                 }
1095         }
1096         m->value.value = get_Store_value(store);
1097         m->value.mode  = get_irn_mode(m->value.value);
1098 }
1099
1100 /**
1101  * Update a memop for a Call.
1102  *
1103  * @param m  the memop
1104  */
1105 static void update_Call_memop(memop_t *m) {
1106         ir_node  *call = m->node;
1107         unsigned prop  = get_Call_memory_properties(call);
1108         int      i;
1109
1110         if (prop & mtp_property_const) {
1111                 /* A constant call did NOT use memory at all, we
1112                    can kick it from the list. */
1113         } else if (prop & mtp_property_pure) {
1114                 /* pure calls READ memory */
1115                 m->flags = 0;
1116         } else
1117                 m->flags = FLAG_KILL_ALL;
1118
1119         for (i = get_irn_n_outs(call) - 1; i >= 0; --i) {
1120                 ir_node *proj = get_irn_out(call, i);
1121
1122                 /* beware of keep edges */
1123                 if (is_End(proj))
1124                         continue;
1125
1126                 switch (get_Proj_proj(proj)) {
1127                 case pn_Call_X_except:
1128                         m->flags |= FLAG_EXCEPTION;
1129                         break;
1130                 case pn_Call_M_regular:
1131                         m->mem = proj;
1132                         break;
1133                 }
1134         }
1135 }
1136
1137 /**
1138  * Update a memop for a Div/Mod/Quot/DivMod.
1139  *
1140  * @param m  the memop
1141  */
1142 static void update_DivOp_memop(memop_t *m) {
1143         ir_node *div = m->node;
1144         int     i;
1145
1146         for (i = get_irn_n_outs(div) - 1; i >= 0; --i) {
1147                 ir_node *proj = get_irn_out(div, i);
1148
1149                 /* beware of keep edges */
1150                 if (is_End(proj))
1151                         continue;
1152
1153                 switch (get_Proj_proj(proj)) {
1154                 case pn_Generic_X_except:
1155                         m->flags |= FLAG_EXCEPTION;
1156                         break;
1157                 case pn_Generic_M_regular:
1158                         m->mem = proj;
1159                         break;
1160                 }
1161         }
1162 }
1163
1164 /**
1165  * Update a memop for a Phi.
1166  *
1167  * @param m  the memop
1168  */
1169 static void update_Phi_memop(memop_t *m) {
1170         /* the Phi is it's own mem */
1171         m->mem = m->node;
1172 }
1173
1174 /**
1175  * Memory walker: collect all memory ops and build topological lists.
1176  */
1177 static void collect_memops(ir_node *irn, void *ctx) {
1178         memop_t  *op;
1179         ir_node  *block;
1180         block_t  *entry;
1181
1182         (void) ctx;
1183         if (is_Proj(irn)) {
1184                 /* we can safely ignore ProjM's except the initial memory */
1185                 if (irn != get_irg_initial_mem(current_ir_graph))
1186                         return;
1187         }
1188
1189         op    = alloc_memop(irn);
1190         block = get_nodes_block(irn);
1191         entry = get_block_entry(block);
1192
1193         if (is_Phi(irn)) {
1194                 update_Phi_memop(op);
1195                 /* Phis must be always placed first */
1196                 op->next = entry->memop_forward;
1197                 entry->memop_forward = op;
1198                 if (entry->memop_backward == NULL)
1199                         entry->memop_backward = op;
1200         } else {
1201                 switch (get_irn_opcode(irn)) {
1202                 case iro_Load:
1203                         update_Load_memop(op);
1204                         break;
1205                 case iro_Store:
1206                         update_Store_memop(op);
1207                         break;
1208                 case iro_Call:
1209                         update_Call_memop(op);
1210                         break;
1211                 case iro_Sync:
1212                 case iro_Pin:
1213                         op->mem = irn;
1214                         break;
1215                 case iro_Proj:
1216                         /* initial memory */
1217                         op->mem = irn;
1218                         break;
1219                 case iro_Return:
1220                 case iro_End:
1221                         /* we can those to find the memory edge */
1222                         break;
1223                 case iro_Div:
1224                 case iro_DivMod:
1225                 case iro_Quot:
1226                 case iro_Mod:
1227                         update_DivOp_memop(op);
1228                         break;
1229
1230                 case iro_Builtin:
1231                         /* TODO: handle some builtins */
1232                 default:
1233                         /* unsupported operation */
1234                         op->flags = FLAG_KILL_ALL;
1235                 }
1236
1237
1238                 /* all other should be placed last */
1239                 if (entry->memop_backward == NULL) {
1240                         entry->memop_forward = entry->memop_backward = op;
1241                 } else {
1242                         entry->memop_backward->next = op;
1243                         entry->memop_backward       = op;
1244                 }
1245         }
1246 }
1247
1248 /**
1249  * Find an address in the current set.
1250  *
1251  * @param value  the value to be searched for
1252  */
1253 static memop_t *find_address(const value_t *value) {
1254         if (rbitset_is_set(env.curr_set, value->id)) {
1255                 memop_t *res = env.curr_id_2_memop[value->id];
1256
1257                 if (res->value.mode == value->mode)
1258                         return res;
1259                 /* allow hidden casts */
1260                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1261                     get_mode_arithmetic(value->mode) == irma_twos_complement &&
1262                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1263                         return res;
1264         }
1265         return NULL;
1266 }
1267
1268 /**
1269  * Find an address in the avail_out set.
1270  *
1271  * @param bl     the block
1272  * @param value  the value to be searched for
1273  */
1274 static memop_t *find_address_avail(const block_t *bl, const value_t *value) {
1275         if (rbitset_is_set(bl->avail_out, value->id)) {
1276                 memop_t *res = bl->id_2_memop_avail[value->id];
1277
1278                 if (res->value.mode == value->mode)
1279                         return res;
1280                 /* allow hidden casts */
1281                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1282                     get_mode_arithmetic(value->mode) == irma_twos_complement &&
1283                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1284                         return res;
1285         }
1286         return NULL;
1287 }
1288
1289 /**
1290  * Kill all addresses from the current set.
1291  */
1292 static void kill_all(void) {
1293         rbitset_clear_all(env.curr_set, env.rbs_size);
1294
1295         /* set sentinel */
1296         rbitset_set(env.curr_set, env.rbs_size - 1);
1297 }
1298
1299 /**
1300  * Kill memops that are not alias free due to a Store value from the current set.
1301  *
1302  * @param value  the Store value
1303  */
1304 static void kill_memops(const value_t *value) {
1305         unsigned pos = 0;
1306         unsigned end = env.rbs_size - 1;
1307
1308         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1309                 memop_t *op = env.curr_id_2_memop[pos];
1310
1311                 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
1312                                                           op->value.address, op->value.mode)) {
1313                         rbitset_clear(env.curr_set, pos);
1314                         env.curr_id_2_memop[pos] = NULL;
1315                         DB((dbg, LEVEL_2, "KILLING %+F because of possible alias address %+F\n", op->node, value->address));
1316                 }
1317         }
1318 }
1319
1320 /**
1321  * Add the value of a memop to the current set.
1322  *
1323  * @param op  the memory op
1324  */
1325 static void add_memop(memop_t *op) {
1326         rbitset_set(env.curr_set, op->value.id);
1327         env.curr_id_2_memop[op->value.id] = op;
1328 }
1329
1330 /**
1331  * Add the value of a memop to the avail_out set.
1332  *
1333  * @param bl  the block
1334  * @param op  the memory op
1335  */
1336 static void add_memop_avail(block_t *bl, memop_t *op) {
1337         rbitset_set(bl->avail_out, op->value.id);
1338         bl->id_2_memop_avail[op->value.id] = op;
1339 }
1340
1341 /**
1342  * Check, if we can convert a value of one mode to another mode
1343  * without changing the representation of bits.
1344  */
1345 static int can_convert_to(const ir_mode *from, const ir_mode *to) {
1346         if (get_mode_arithmetic(from) == irma_twos_complement &&
1347             get_mode_arithmetic(to) == irma_twos_complement &&
1348             get_mode_size_bits(from) == get_mode_size_bits(to))
1349                 return 1;
1350         return 0;
1351 }
1352
1353 /**
1354  * Add a Conv if needed.
1355  */
1356 static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
1357         ir_mode *other = get_irn_mode(irn);
1358         if (other != mode) {
1359                 /* different modes: check if conversion is possible without changing the bits */
1360                 if (can_convert_to(other, mode)) {
1361                         ir_node *block = get_nodes_block(irn);
1362                         return new_r_Conv(current_ir_graph, block, irn, mode);
1363                 }
1364                 /* otherwise not possible ... yet */
1365                 return NULL;
1366         }
1367         return irn;
1368 }
1369
1370 /**
1371  * Update the address of an value if this address was a load result
1372  * and the load is killed now.
1373  *
1374  * @param value  the value whose address is updated
1375  */
1376 static void update_address(value_t *value) {
1377         if (is_Proj(value->address)) {
1378                 ir_node *load = get_Proj_pred(value->address);
1379
1380                 if (is_Load(load)) {
1381                         const memop_t *op = get_irn_memop(load);
1382
1383                         if (op->flags & FLAG_KILLED_NODE)
1384                                 value->address = op->replace;
1385                 }
1386         }
1387 }
1388
1389 /**
1390  * Do forward dataflow analysis on the given block and calculate the
1391  * GEN and KILL in the current (avail) set.
1392  *
1393  * @param bl  the block
1394  */
1395 static void calc_gen_kill_avail(block_t *bl) {
1396         memop_t *op;
1397         ir_node *def;
1398
1399         for (op = bl->memop_forward; op != NULL; op = op->next) {
1400                 switch (get_irn_opcode(op->node)) {
1401                 case iro_Phi:
1402                         /* meet */
1403                         break;
1404                 case iro_Sync:
1405                         /* join */
1406                         break;
1407                 case iro_Load:
1408                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1409                                 /* do we have this already? */
1410                                 memop_t *other;
1411
1412                                 update_address(&op->value);
1413                                 other = find_address(&op->value);
1414                                 if (other != NULL && other != op) {
1415                                         def = conv_to(other->value.value, op->value.mode);
1416                                         if (def != NULL) {
1417 #ifdef DEBUG_libfirm
1418                                                 if (is_Store(other->node)) {
1419                                                         /* RAW */
1420                                                         DB((dbg, LEVEL_1, "RAW %+F <- %+F(%+F)\n", op->node, def, other->node));
1421                                                 } else {
1422                                                         /* RAR */
1423                                                         DB((dbg, LEVEL_1, "RAR %+F <- %+F(%+F)\n", op->node, def, other->node));
1424                                                 }
1425 #endif
1426                                                 mark_replace_load(op, def);
1427                                                 /* do NOT change the memop table */
1428                                                 continue;
1429                                         }
1430                                 }
1431                                 /* add this value */
1432                                 add_memop(op);
1433                         }
1434                         break;
1435                 case iro_Store:
1436                         if (! (op->flags & FLAG_KILLED_NODE)) {
1437                                 /* do we have this store already */
1438                                 memop_t *other;
1439
1440                                 update_address(&op->value);
1441                                 other = find_address(&op->value);
1442                                 if (other != NULL) {
1443                                         if (is_Store(other->node)) {
1444                                                 if (op != other && !(other->flags & FLAG_IGNORE) &&
1445                                                     get_nodes_block(other->node) == get_nodes_block(op->node)) {
1446                                                         /*
1447                                                          * A WAW in the same block we can kick the first store.
1448                                                          * This is a shortcut: we know that the second Store will be anticipated
1449                                                          * then in an case.
1450                                                          */
1451                                                         DB((dbg, LEVEL_1, "WAW %+F <- %+F\n", other->node, op->node));
1452                                                         mark_remove_store(other);
1453                                                         /* FIXME: a Load might be get freed due to this killed store */
1454                                                 }
1455                                         } else if (other->value.value == op->value.value && !(op->flags & FLAG_IGNORE)) {
1456                                                 /* WAR */
1457                                                 DB((dbg, LEVEL_1, "WAR %+F <- %+F\n", op->node, other->node));
1458                                                 mark_remove_store(op);
1459                                                 /* do NOT change the memop table */
1460                                                 continue;
1461                                         }
1462                                 }
1463                                 /* KILL all possible aliases */
1464                                 kill_memops(&op->value);
1465                                 /* add this value */
1466                                 add_memop(op);
1467                         }
1468                         break;
1469                 default:
1470                         if (op->flags & FLAG_KILL_ALL)
1471                                 kill_all();
1472                 }
1473         }
1474 }
1475
1476 #define BYTE_SIZE(x)  (((x) + 7) >> 3)
1477
1478 /**
1479  * Do forward dataflow analysis on a given block to calculate the avail_out set
1480  * for this block only.
1481  *
1482  * @param block  the block
1483  */
1484 static void forward_avail(block_t *bl) {
1485         /* fill the data from the current block */
1486         env.curr_id_2_memop = bl->id_2_memop_avail;
1487         env.curr_set        = bl->avail_out;
1488
1489         calc_gen_kill_avail(bl);
1490         dump_curr(bl, "Avail_out");
1491 }
1492
1493 /**
1494  * Do backward dataflow analysis on a given block to calculate the antic set
1495  * of Loaded addresses.
1496  *
1497  * @param bl  the block
1498  *
1499  * @return non-zero if the set has changed since last iteration
1500  */
1501 static int backward_antic(block_t *bl) {
1502         memop_t *op;
1503         int     n = get_Block_n_cfg_outs(bl->block);
1504
1505         if (n >= 1) {
1506                 ir_node *succ    = get_Block_cfg_out(bl->block, 0);
1507                 block_t *succ_bl = get_block_entry(succ);
1508                 int i;
1509
1510                 rbitset_cpy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1511                 memcpy(env.curr_id_2_memop, succ_bl->id_2_memop_antic, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1512
1513                 for (i = n - 1; i > 0; --i) {
1514                         ir_node *succ    = get_Block_cfg_out(bl->block, i);
1515                         block_t *succ_bl = get_block_entry(succ);
1516
1517                         rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1518                 }
1519         } else {
1520                 /* block ends with a noreturn call */
1521                 kill_all();
1522         }
1523
1524         dump_curr(bl, "AnticL_out");
1525
1526         for (op = bl->memop_backward; op != NULL; op = op->prev) {
1527                 switch (get_irn_opcode(op->node)) {
1528                 case iro_Phi:
1529                         /* meet */
1530                         break;
1531                 case iro_Sync:
1532                         /* join */
1533                         break;
1534                 case iro_Load:
1535                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1536                                 /* always add it */
1537                                 add_memop(op);
1538                         }
1539                         break;
1540                 case iro_Store:
1541                         if (! (op->flags & FLAG_KILLED_NODE)) {
1542                                 /* a Store: check which memops must be killed */
1543                                 kill_memops(&op->value);
1544                         }
1545                         break;
1546                 default:
1547                         if (op->flags & FLAG_KILL_ALL)
1548                                 kill_all();
1549                 }
1550         }
1551
1552         memcpy(bl->id_2_memop_antic, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1553         if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1554                 /* changed */
1555                 rbitset_cpy(bl->anticL_in, env.curr_set, env.rbs_size);
1556                 dump_curr(bl, "AnticL_in*");
1557                 return 1;
1558         }
1559         dump_curr(bl, "AnticL_in");
1560         return 0;
1561 }
1562
1563 /**
1564  * Replace a Load memop by a already known value.
1565  *
1566  * @param op  the Load memop
1567  */
1568 static void replace_load(memop_t *op) {
1569         ir_node *load = op->node;
1570         ir_node *def  = skip_Id(op->replace);
1571         ir_node *proj;
1572         ir_mode *mode;
1573
1574         if (def != NULL)
1575                 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1576         else {
1577                 if (op->flags & FLAG_EXCEPTION) {
1578                         /* bad: this node is unused and executed for exception only */
1579                         DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1580                         return;
1581                 }
1582                 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1583         }
1584
1585         if (op->mem != NULL) {
1586                 /* in rare cases a Load might have NO memory */
1587                 exchange(op->mem, get_Load_mem(load));
1588         }
1589         proj = op->projs[pn_Load_res];
1590         if (proj != NULL) {
1591                 mode = get_irn_mode(proj);
1592                 if (get_irn_mode(def) != mode) {
1593                         /* a hidden cast */
1594                         dbg_info *db    = get_irn_dbg_info(load);
1595                         ir_node  *block = get_nodes_block(proj);
1596                         def = new_rd_Conv(db, current_ir_graph, block, def, mode);
1597                 }
1598                 exchange(proj, def);
1599         }
1600         proj = op->projs[pn_Load_X_except];
1601         if (proj != NULL) {
1602                 exchange(proj, new_Bad());
1603         }
1604         proj = op->projs[pn_Load_X_regular];
1605         if (proj != NULL) {
1606                 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1607         }
1608 }
1609
1610 /**
1611  * Remove a Store memop.
1612  *
1613  * @param op  the Store memop
1614  */
1615 static void remove_store(memop_t *op) {
1616         ir_node *store = op->node;
1617         ir_node *proj;
1618
1619         DB((dbg, LEVEL_1, "Removing %+F\n", store));
1620
1621         if (op->mem != NULL) {
1622                 /* in rare cases a Store might have no memory */
1623                 exchange(op->mem, get_Store_mem(store));
1624         }
1625         proj = op->projs[pn_Store_X_except];
1626         if (proj != NULL) {
1627                 exchange(proj, new_Bad());
1628         }
1629         proj = op->projs[pn_Store_X_regular];
1630         if (proj != NULL) {
1631                 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(store)));
1632         }
1633 }
1634
1635
1636 /**
1637  * Do all necessary replacements for a given block.
1638  *
1639  * @param bl  the block
1640  */
1641 static void do_replacements(block_t *bl) {
1642         memop_t *op;
1643
1644         for (op = bl->memop_forward; op != NULL; op = op->next) {
1645                 if (op->flags & FLAG_KILLED_NODE) {
1646                         switch (get_irn_opcode(op->node)) {
1647                         case iro_Load:
1648                                 replace_load(op);
1649                                 break;
1650                         case iro_Store:
1651                                 remove_store(op);
1652                                 break;
1653                         }
1654                 }
1655         }
1656 }
1657
1658 /**
1659  * Calculate the Avail_out sets for all basic blocks.
1660  */
1661 static void calcAvail(void) {
1662         memop_t  **tmp_memop = env.curr_id_2_memop;
1663         unsigned *tmp_set    = env.curr_set;
1664         block_t  *bl;
1665
1666         /* calculate avail_out */
1667         DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1668
1669         /* iterate over all blocks in in any order, skip the start block */
1670         for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1671                 forward_avail(bl);
1672         }
1673
1674         /* restore the current sets */
1675         env.curr_id_2_memop = tmp_memop;
1676         env.curr_set        = tmp_set;
1677 }
1678
1679 /**
1680  * Calculate the Antic_in sets for all basic blocks.
1681  */
1682 static void calcAntic(void) {
1683         int i, need_iter;
1684
1685         /* calculate antic_out */
1686         DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1687         i = 0;
1688         do {
1689                 block_t *bl;
1690
1691                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1692
1693                 need_iter = 0;
1694
1695                 /* over all blocks in reverse post order */
1696                 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1697                         need_iter |= backward_antic(bl);
1698                 }
1699                 ++i;
1700         } while (need_iter);
1701         DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1702 }
1703
1704 /**
1705  * Return the node representing the last memory in a block.
1706  *
1707  * @param bl  the block
1708  */
1709 static ir_node *find_last_memory(block_t *bl) {
1710         for (;;) {
1711                 if (bl->memop_backward != NULL) {
1712                         return bl->memop_backward->mem;
1713                 }
1714                 /* if there is NO memory in this block, go to the dominator */
1715                 bl = get_block_entry(get_Block_idom(bl->block));
1716         }
1717 }
1718
1719 /**
1720  * Reroute all memory users of old memory
1721  * to a new memory IR-node.
1722  *
1723  * @param omem  the old memory IR-node
1724  * @param nmem  the new memory IR-node
1725  */
1726 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
1727         int i;
1728
1729         for (i = get_irn_n_outs(omem) - 1; i >= 0; --i) {
1730                 int     n_pos;
1731                 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1732
1733                 set_irn_n(user, n_pos, nmem);
1734         }
1735
1736         /* all edges previously point to omem now point to nmem */
1737         nmem->out = omem->out;
1738 }
1739
1740 /**
1741  * Reroute memory users of old memory that are dominated by a given block
1742  * to a new memory IR-node.
1743  *
1744  * @param omem     the old memory IR-node
1745  * @param nmem     the new memory IR-node
1746  * @param pass_bl  the block the memory must pass
1747  */
1748 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl) {
1749         int             i, j, n = get_irn_n_outs(omem);
1750         ir_def_use_edge *edges = NEW_ARR_D(ir_def_use_edge, &env.obst, n + 1);
1751
1752         for (i = j = 0; i < n; ++i) {
1753                 int     n_pos;
1754                 ir_node *user   = get_irn_out_ex(omem, i, &n_pos);
1755                 ir_node *use_bl = get_nodes_block(user);
1756
1757
1758                 if (is_Phi(user)) {
1759                         use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1760                 }
1761                 if (block_dominates(pass_bl, use_bl)) {
1762                         /* found an user that is dominated */
1763                         ++j;
1764                         edges[j].pos = n_pos;
1765                         edges[j].use = user;
1766
1767                         set_irn_n(user, n_pos, nmem);
1768                 }
1769         }
1770
1771         /* Modify the out structure: we create a new out edge array on our
1772            temporary obstack here. This should be no problem, as we invalidate the edges
1773            at the end either. */
1774         /* first entry is used for the length */
1775         edges[0].pos = j;
1776         nmem->out = edges;
1777 }
1778
1779 /**
1780  * translate an address through a Phi node into a given predecessor
1781  * block.
1782  *
1783  * @param address  the address
1784  * @param block    the block
1785  * @param pos      the position of the predecessor in block
1786  */
1787 static ir_node *phi_translate(ir_node *address, ir_node *block, int pos) {
1788         if (is_Phi(address) && get_nodes_block(address) == block)
1789                 address = get_Phi_pred(address, pos);
1790         return address;
1791 }
1792
1793 /**
1794  * Get the effective block of an address in the pos'th predecessor
1795  * of the given block.
1796  *
1797  * @param address  the address
1798  * @param block    the block
1799  * @param pos      the position of the predecessor in block
1800  */
1801 static ir_node *get_effective_block(ir_node *address, ir_node *block, int pos) {
1802         address = phi_translate(address, block, pos);
1803         return get_nodes_block(address);
1804 }
1805
1806 /**
1807  * insert Loads, making partly redundant Loads fully redundant
1808  */
1809 static int insert_Load(block_t *bl) {
1810         ir_node  *block = bl->block;
1811         int      i, n = get_Block_n_cfgpreds(block);
1812         unsigned pos = 0;
1813         unsigned end = env.rbs_size - 1;
1814
1815         DB((dbg, LEVEL_3, "processing %+F\n", block));
1816
1817         if (n == 0) {
1818                 /* might still happen for an unreachable block (end for instance) */
1819                 return 0;
1820         }
1821
1822         if (n > 1) {
1823                 int     i, pos;
1824                 ir_node **ins;
1825
1826                 NEW_ARR_A(ir_node *, ins, n);
1827
1828                 rbitset_set_all(env.curr_set, env.rbs_size);
1829
1830                 /* More than one predecessors, calculate the join for all avail_outs ignoring unevaluated
1831                    Blocks. These put in Top anyway. */
1832                 for (i = n - 1; i >= 0; --i) {
1833                         ir_node *pred = skip_Proj(get_Block_cfgpred(block, i));
1834                         ir_node *blk  = get_nodes_block(pred);
1835                         block_t *pred_bl;
1836
1837                         pred_bl = get_block_entry(blk);
1838                         rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
1839
1840                         if (is_Load(pred) || is_Store(pred)) {
1841                                 /* We reached this block by an exception from a Load or Store:
1842                                  * the memop creating the exception was NOT completed than, kill it
1843                                  */
1844                                 memop_t *exc_op = get_irn_memop(pred);
1845                                 rbitset_clear(env.curr_set, exc_op->value.id);
1846                         }
1847
1848                 }
1849                 /*
1850                  * Ensure that all values are in the map: build Phi's if necessary:
1851                  * Note: the last bit is the sentinel and ALWAYS set, so start with -2.
1852                  */
1853                 for (pos = env.rbs_size - 2; pos >= 0; --pos) {
1854                         if (! rbitset_is_set(env.curr_set, pos))
1855                                 env.curr_id_2_memop[pos] = NULL;
1856                         else {
1857                                 ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
1858                                 block_t *pred_bl = get_block_entry(pred);
1859                                 int     need_phi = 0;
1860                                 memop_t *first   = NULL;
1861                                 ir_mode *mode;
1862
1863                                 for (i = 0; i < n; ++i) {
1864                                         memop_t *mop;
1865
1866                                         pred    = get_Block_cfgpred_block(bl->block, i);
1867                                         pred_bl = get_block_entry(pred);
1868
1869                                         mop = pred_bl->id_2_memop_avail[pos];
1870                                         if (first == NULL) {
1871                                                 first = mop;
1872                                                 ins[0] = first->value.value;
1873                                                 mode = get_irn_mode(ins[0]);
1874
1875                                                 /* no Phi needed so far */
1876                                                 env.curr_id_2_memop[pos] = first;
1877                                         } else {
1878                                                 ins[i] = conv_to(mop->value.value, mode);
1879                                                 if (ins[i] != ins[0]) {
1880                                                         if (ins[i] == NULL) {
1881                                                                 /* conversion failed */
1882                                                                 env.curr_id_2_memop[pos] = NULL;
1883                                                                 rbitset_clear(env.curr_set, pos);
1884                                                                 break;
1885                                                         }
1886                                                         need_phi = 1;
1887                                                 }
1888                                         }
1889                                 }
1890                                 if (need_phi) {
1891                                         /* build a Phi  */
1892                                         ir_node *phi = new_r_Phi(current_ir_graph, bl->block, n, ins, mode);
1893                                         memop_t *phiop = alloc_memop(phi);
1894
1895                                         phiop->value = first->value;
1896                                         phiop->value.value = phi;
1897
1898                                         /* no need to link it in, as it is a DATA phi */
1899
1900                                         env.curr_id_2_memop[pos] = phiop;
1901
1902                                         DB((dbg, LEVEL_3, "Created new %+F on merging value for address %+F\n", phi, first->value.address));
1903                                 }
1904                         }
1905                 }
1906         } else {
1907                 /* only one predecessor, simply copy the map */
1908                 ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
1909                 block_t *pred_bl = get_block_entry(pred);
1910
1911                 rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
1912
1913                 memcpy(env.curr_id_2_memop, pred_bl->id_2_memop_avail, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
1914         }
1915
1916         if (n > 1) {
1917                 /* check for partly redundant values */
1918                 for (pos = rbitset_next(bl->anticL_in, pos, 1); pos != end; pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
1919                         memop_t *op = bl->id_2_memop_antic[pos];
1920                         int     have_some, all_same;
1921                         ir_node *first;
1922
1923                         if (rbitset_is_set(env.curr_set, pos)) {
1924                                 /* already avail */
1925                                 continue;
1926                         }
1927
1928                         assert(is_Load(op->node));
1929
1930                         DB((dbg, LEVEL_3, "anticipated %+F\n", op->node));
1931
1932                         have_some  = 0;
1933                         all_same   = 1;
1934                         first      = 0;
1935                         for (i = n - 1; i >= 0; --i) {
1936                                 ir_node *pred    = get_Block_cfgpred_block(block, i);
1937                                 block_t *pred_bl = get_block_entry(pred);
1938                                 memop_t *e       = find_address_avail(pred_bl, &op->value);
1939                                 ir_mode *mode    = op->value.mode;
1940
1941                                 if (e == NULL) {
1942                                         ir_node *ef_block = get_effective_block(op->value.address, block, i);
1943                                         if (! block_dominates(ef_block, pred)) {
1944                                                 /* cannot place a copy here */
1945                                                 have_some = 0;
1946                                                 DB((dbg, LEVEL_3, "%+F is cannot be moved into predecessor %+F\n", op->node, pred));
1947                                                 break;
1948                                         }
1949                                         DB((dbg, LEVEL_3, "%+F is not available in predecessor %+F\n", op->node, pred));
1950                                         pred_bl->avail = NULL;
1951                                         all_same       = 0;
1952                                 } else {
1953                                         if (e->value.mode != mode && !can_convert_to(e->value.mode, mode)) {
1954                                                 /* cannot create a Phi due to different modes */
1955                                                 have_some = 0;
1956                                                 break;
1957                                         }
1958                                         pred_bl->avail = e;
1959                                         have_some      = 1;
1960                                         DB((dbg, LEVEL_3, "%+F is available for %+F in predecessor %+F\n", e->node, op->node, pred));
1961                                         if (first == NULL)
1962                                                 first = e->node;
1963                                         else if (first != e->node)
1964                                                 all_same = 0;
1965                                 }
1966                         }
1967                         if (have_some && !all_same) {
1968                                 ir_mode *mode = op->value.mode;
1969                                 ir_node **in, *phi;
1970                                 memop_t *phi_op;
1971
1972                                 NEW_ARR_A(ir_node *, in, n);
1973
1974                                 for (i = n - 1; i >= 0; --i) {
1975                                         ir_node *pred    = get_Block_cfgpred_block(block, i);
1976                                         block_t *pred_bl = get_block_entry(pred);
1977
1978                                         if (pred_bl->avail == NULL) {
1979                                                 /* create a new Load here and make to make it fully redundant */
1980                                                 dbg_info *db       = get_irn_dbg_info(op->node);
1981                                                 ir_node  *last_mem = find_last_memory(pred_bl);
1982                                                 ir_node  *load, *def, *adr;
1983                                                 memop_t  *new_op;
1984
1985                                                 assert(last_mem != NULL);
1986                                                 adr  = phi_translate(op->value.address, block, i);
1987                                                 load = new_rd_Load(db, current_ir_graph, pred, last_mem, adr, mode, cons_none);
1988                                                 def  = new_r_Proj(current_ir_graph, pred, load, mode, pn_Load_res);
1989                                                 DB((dbg, LEVEL_1, "Created new %+F in %+F for party redundant %+F\n", load, pred, op->node));
1990
1991                                                 new_op                = alloc_memop(load);
1992                                                 new_op->mem           = new_r_Proj(current_ir_graph, pred, load, mode_M, pn_Load_M);
1993                                                 new_op->value.address = adr;
1994                                                 new_op->value.id      = op->value.id;
1995                                                 new_op->value.mode    = mode;
1996                                                 new_op->value.value   = def;
1997
1998                                                 new_op->projs[pn_Load_M]   = new_op->mem;
1999                                                 new_op->projs[pn_Load_res] = def;
2000
2001                                                 new_op->prev = pred_bl->memop_backward;
2002                                                 if (pred_bl->memop_backward != NULL)
2003                                                         pred_bl->memop_backward->next = new_op;
2004
2005                                                 pred_bl->memop_backward = new_op;
2006
2007                                                 if (pred_bl->memop_forward == NULL)
2008                                                         pred_bl->memop_forward = new_op;
2009
2010                                                 if (get_nodes_block(last_mem) == pred) {
2011                                                         /* We have add a new last memory op in pred block.
2012                                                            If pred had already a last mem, reroute all memory
2013                                                            users. */
2014                                                         reroute_all_mem_users(last_mem, new_op->mem);
2015                                                 } else {
2016                                                         /* reroute only those memory going through the pre block */
2017                                                         reroute_mem_through(last_mem, new_op->mem, pred);
2018                                                 }
2019
2020                                                 /* we added this load at the end, so it will be avail anyway */
2021                                                 add_memop_avail(pred_bl, new_op);
2022                                                 pred_bl->avail = new_op;
2023                                         }
2024                                         in[i] = conv_to(pred_bl->avail->value.value, mode);
2025                                 }
2026                                 phi = new_r_Phi(current_ir_graph, block, n, in, mode);
2027                                 DB((dbg, LEVEL_1, "Created new %+F in %+F for now redundant %+F\n", phi, block, op->node));
2028
2029                                 phi_op = clone_memop_phi(op, phi);
2030                                 add_memop(phi_op);
2031                         }
2032                 }
2033         }
2034
2035         /* recalculate avail by gen and kill */
2036         calc_gen_kill_avail(bl);
2037
2038         /* always update the map after gen/kill, as values might have been changed due to RAR/WAR/WAW */
2039         memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2040
2041         if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
2042                 /* the avail set has changed */
2043                 rbitset_cpy(bl->avail_out, env.curr_set, env.rbs_size);
2044                 dump_curr(bl, "Avail_out*");
2045                 return 1;
2046         }
2047         dump_curr(bl, "Avail_out");
2048         return 0;
2049 }
2050
2051 /**
2052  * Insert Loads upwards.
2053  */
2054 static void insert_Loads_upwards(void) {
2055         int i, need_iter;
2056         block_t *bl;
2057
2058         /* recalculate antic_out and insert Loads */
2059         DB((dbg, LEVEL_2, "Inserting Loads\n"));
2060
2061         i = 0;
2062         do {
2063                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
2064
2065                 need_iter = 0;
2066
2067                 /* over all blocks in reverse post order, skip the start block */
2068                 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
2069                         need_iter |= insert_Load(bl);
2070                 }
2071                 ++i;
2072         } while (need_iter);
2073
2074         DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
2075 }
2076
2077 /**
2078  * kill unreachable control flow.
2079  */
2080 static void kill_unreachable_blocks(ir_graph *irg) {
2081         block_t *bl;
2082         ir_node **ins;
2083         int     changed = 0;
2084
2085         NEW_ARR_A(ir_node *, ins, env.max_cfg_preds);
2086
2087         for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2088                 ir_node *block = bl->block;
2089                 int     i, j, k, n;
2090
2091                 assert(get_Block_mark(block));
2092
2093                 n = get_Block_n_cfgpreds(block);
2094
2095                 for (i = j = 0; i < n; ++i) {
2096                         ir_node *pred = get_Block_cfgpred(block, i);
2097                         ir_node *pred_bl;
2098
2099                         if (is_Bad(pred))
2100                                 continue;
2101
2102                         pred_bl = get_nodes_block(skip_Proj(pred));
2103                         if (! get_Block_mark(pred_bl))
2104                                 continue;
2105
2106                         ins[j++] = pred;
2107                 }
2108                 if (j != n) {
2109                         ir_node *phi, *next;
2110
2111                         /* some unreachable blocks detected */
2112                         changed = 1;
2113
2114                         DB((dbg, LEVEL_1, "Killing dead block predecessors on %+F\n", block));
2115
2116                         set_irn_in(block, j, ins);
2117
2118                         /* shorten all Phi nodes */
2119                         for (phi = get_Block_phis(block); phi != NULL; phi = next) {
2120                                 next = get_Phi_next(phi);
2121
2122                                 for (i = k = 0; i < n; ++i) {
2123                                         ir_node *pred = get_Block_cfgpred_block(block, i);
2124
2125                                         if (is_Bad(pred))
2126                                                 continue;
2127
2128                                         if (! get_Block_mark(pred))
2129                                                 continue;
2130
2131                                         ins[k++] = get_Phi_pred(phi, i);
2132                                 }
2133                                 if (k == 1)
2134                                         exchange(phi, ins[0]);
2135                                 else
2136                                         set_irn_in(phi, k, ins);
2137                         }
2138                 }
2139
2140         }
2141
2142         if (changed) {
2143                 /* kick keep alives */
2144                 ir_node *end = get_irg_end(irg);
2145                 int     i, j, n = get_End_n_keepalives(end);
2146
2147                 NEW_ARR_A(ir_node *, ins, n);
2148
2149                 for (i = j = 0; i < n; ++i) {
2150                         ir_node *ka = get_End_keepalive(end, i);
2151                         ir_node *ka_bl;
2152
2153                         if (is_Bad(ka))
2154                                 continue;
2155                         if (is_Block(ka))
2156                                 ka_bl = ka;
2157                         else
2158                                 ka_bl = get_nodes_block(skip_Proj(ka));
2159                         if (get_Block_mark(ka_bl))
2160                                 ins[j++] = ka;
2161                 }
2162                 if (j != n)
2163                         set_End_keepalives(end, j, ins);
2164
2165                 free_irg_outs(irg);
2166
2167                 /* this transformation do NOT invalidate the dominance */
2168         }
2169 }
2170
2171 int opt_ldst(ir_graph *irg) {
2172         block_t  *bl;
2173         ir_graph *rem = current_ir_graph;
2174
2175         current_ir_graph = irg;
2176
2177         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
2178 //      firm_dbg_set_mask(dbg, -1);
2179
2180         DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
2181
2182         /* we need landing pads */
2183         remove_critical_cf_edges(irg);
2184
2185 //      dump_ir_block_graph(irg, "-XXX");
2186
2187         if (get_opt_alias_analysis()) {
2188                 assure_irg_entity_usage_computed(irg);
2189                 assure_irp_globals_entity_usage_computed();
2190         }
2191
2192         obstack_init(&env.obst);
2193         ir_nodemap_init(&env.adr_map);
2194
2195         env.forward       = NULL;
2196         env.backward      = NULL;
2197         env.curr_adr_id   = 0;
2198         env.n_mem_ops     = 0;
2199         env.max_cfg_preds = 0;
2200         env.changed       = 0;
2201         env.start_bl      = get_irg_start_block(irg);
2202         env.end_bl        = get_irg_end_block(irg);
2203 #ifdef DEBUG_libfirm
2204         env.id_2_address  = NEW_ARR_F(ir_node *, 0);
2205 #endif
2206
2207         assure_irg_outs(irg);
2208
2209         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2210
2211         /* first step: allocate block entries. Note that some blocks might be
2212            unreachable here. Using the normal walk ensures that ALL blocks are initialized. */
2213         irg_walk_graph(irg, prepare_blocks, link_phis, NULL);
2214
2215         /* produce an inverse post-order list for the CFG: this links only reachable
2216            blocks */
2217         irg_out_block_walk(get_irg_start_block(irg), NULL, inverse_post_order, NULL);
2218
2219         if (! get_Block_mark(env.end_bl)) {
2220                 /*
2221                  * The end block is NOT reachable due to endless loops
2222                  * or no_return calls.
2223                  * Place the end block last.
2224                  * env.backward points to the last block in the list for this purpose.
2225                  */
2226                 env.backward->forward_next = get_block_entry(env.end_bl);
2227
2228                 set_Block_mark(env.end_bl, 1);
2229         }
2230
2231         /* KILL unreachable blocks: these disturb the data flow analysis */
2232         kill_unreachable_blocks(irg);
2233
2234         assure_doms(irg);
2235
2236         /* second step: find and sort all memory ops */
2237         walk_memory_irg(irg, collect_memops, NULL, NULL);
2238
2239 #ifdef DEBUG_libfirm
2240         /* check that the backward map is correct */
2241         assert((unsigned)ARR_LEN(env.id_2_address) == env.curr_adr_id);
2242 #endif
2243
2244         if (env.n_mem_ops == 0) {
2245                 /* no memory ops */
2246                 goto end;
2247         }
2248
2249         /* create the backward links. */
2250         env.backward = NULL;
2251         irg_block_walk_graph(irg, NULL, collect_backward, NULL);
2252
2253         /* link the end block in */
2254         bl = get_block_entry(env.end_bl);
2255         bl->backward_next = env.backward;
2256         env.backward      = bl;
2257
2258         /* check that we really start with the start / end block */
2259         assert(env.forward->block  == env.start_bl);
2260         assert(env.backward->block == env.end_bl);
2261
2262         /* create address sets: for now, only the existing addresses are allowed plus one
2263            needed for the sentinel */
2264         env.rbs_size = env.n_mem_ops + 1;
2265
2266         /* create the current set */
2267         env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2268         rbitset_set(env.curr_set, env.rbs_size - 1);
2269         env.curr_id_2_memop = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2270         memset(env.curr_id_2_memop, 0, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2271
2272         for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2273                 /* set sentinel bits */
2274                 bl->avail_out  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2275                 rbitset_set(bl->avail_out, env.rbs_size - 1);
2276
2277                 bl->id_2_memop_avail = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2278                 memset(bl->id_2_memop_avail, 0, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
2279
2280                 bl->anticL_in  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2281                 rbitset_set(bl->anticL_in, env.rbs_size - 1);
2282
2283                 bl->id_2_memop_antic = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2284                 memset(bl->id_2_memop_antic, 0, env.rbs_size * sizeof(bl->id_2_memop_antic[0]));
2285         }
2286
2287 //      dump_block_list(&env);
2288
2289         calcAvail();
2290         calcAntic();
2291
2292         insert_Loads_upwards();
2293
2294         if (env.changed) {
2295                 /* over all blocks in reverse post order */
2296                 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2297                         do_replacements(bl);
2298                 }
2299
2300                 /* not only invalidate but free them. We might allocate new out arrays
2301                    on our obstack which will be deleted yet. */
2302                 free_irg_outs(irg);
2303                 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
2304         }
2305 end:
2306
2307         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2308         ir_nodemap_destroy(&env.adr_map);
2309         obstack_free(&env.obst, NULL);
2310
2311 //      dump_ir_block_graph(irg, "-YYY");
2312
2313 #ifdef DEBUG_libfirm
2314         DEL_ARR_F(env.id_2_address);
2315 #endif
2316
2317         current_ir_graph = rem;
2318         return env.changed != 0;
2319 }