use environment also in non-debug mode.
[libfirm] / ir / opt / opt_ldst.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Dataflow driven Load/Store optimizations, uses some ideas from
23  *          VanDrunen's LEPRE
24  * @author  Michael Beck
25  * @version $Id$
26  */
27 #include "config.h"
28
29 #include "irnode_t.h"
30 #include "irflag_t.h"
31 #include "array_t.h"
32 #include "ircons.h"
33 #include "irdom.h"
34 #include "irgmod.h"
35 #include "irgwalk.h"
36 #include "irouts.h"
37 #include "irgraph.h"
38 #include "irgopt.h"
39 #include "iropt.h"
40 #include "iroptimize.h"
41 #include "irnodemap.h"
42 #include "raw_bitset.h"
43 #include "debug.h"
44 #include "error.h"
45
46 /* maximum number of output Proj's */
47 #define MAX_PROJ (pn_Load_max > pn_Store_max ? pn_Load_max : pn_Store_max)
48
49 /**
50  * Mapping an address to an dense ID.
51  */
52 typedef struct address_entry_t {
53         unsigned id;          /**< The ID */
54 } address_entry;
55
56 /**
57  * Memop-flags.
58  */
59 enum memop_flags {
60         FLAG_KILL_ALL    = 1, /**< KILL all addresses */
61         FLAG_KILLED_NODE = 2, /**< this node was killed */
62         FLAG_EXCEPTION   = 4, /**< this node has exception flow */
63         FLAG_IGNORE      = 8, /**< ignore this node (volatile or other) */
64 };
65
66 /**
67  * A value: This represents a value stored at a given address in
68  * memory. Do not confuse with values from value numbering.
69  */
70 typedef struct value_t value_t;
71 struct value_t {
72         ir_node  *address;    /**< the address of this value */
73         ir_node  *value;      /**< the value itself */
74         ir_mode  *mode;       /**< the mode of the value */
75         unsigned id;          /**< address id */
76 };
77
78 /**
79  * A memop describes an memory-related operation.
80  * These are Loads/Store and all other ops that might modify
81  * memory (Calls, CopyB) or causing exceptions.
82  */
83 typedef struct memop_t memop_t;
84 struct memop_t {
85         value_t  value;      /**< the value of this memop: only defined for Load/Store */
86         ir_node  *node;      /**< the memory op itself */
87         ir_node  *mem;       /**< the memory FROM this node */
88         ir_node  *replace;   /**< the replacement node if this memop is replaced */
89         memop_t  *next;      /**< links to the next memory op in the block in forward order. */
90         memop_t  *prev;      /**< links to the previous memory op in the block in forward order. */
91         unsigned flags;      /**< memop flags */
92         ir_node  *projs[MAX_PROJ]; /**< Projs of this memory op */
93 };
94
95 /**
96  * Additional data for every basic block.
97  */
98 typedef struct block_t block_t;
99 struct block_t {
100         memop_t  *memop_forward;     /**< topologically sorted list of memory ops in this block */
101         memop_t  *memop_backward;    /**< last memop in the list */
102         unsigned *avail_out;         /**< out-set of available addresses */
103         memop_t  **id_2_memop_avail; /**< maps avail address ids to memops */
104         unsigned *anticL_in;         /**< in-set of anticipated Load addresses */
105         memop_t  **id_2_memop_antic; /**< maps anticipated address ids to memops */
106         ir_node  *block;             /**< the associated block */
107         block_t  *forward_next;      /**< next block entry for forward iteration */
108         block_t  *backward_next;     /**< next block entry for backward iteration */
109         memop_t  *avail;             /**< used locally for the avail map */
110         memop_t  **trans_results;    /**< used to cached translated nodes due antic calculation. */
111 };
112
113 /**
114  * Metadata for this pass.
115  */
116 typedef struct ldst_env_t {
117         struct obstack  obst;              /**< obstack for temporary data */
118         ir_nodemap_t    adr_map;           /**< Map addresses to */
119         block_t         *forward;          /**< Inverse post-order list of all blocks Start->End */
120         block_t         *backward;         /**< Inverse post-order list of all blocks End->Start */
121         ir_node         *start_bl;         /**< start block of the current graph */
122         ir_node         *end_bl;           /**< end block of the current graph */
123         unsigned        *curr_set;         /**< current set of addresses */
124         memop_t         **curr_id_2_memop; /**< current map of address ids to memops */
125         unsigned        curr_adr_id;       /**< number for address mapping */
126         unsigned        n_mem_ops;         /**< number of memory operations (Loads/Stores) */
127         unsigned        rbs_size;          /**< size of all bitsets in bytes */
128         int             max_cfg_preds;     /**< maximum number of block cfg predecessors */
129         int             changed;           /**< Flags for changed graph state */
130 #ifdef DEBUG_libfirm
131         ir_node         **id_2_address;    /**< maps an id to the used address */
132 #endif
133 } ldst_env;
134
135 /* the one and only environment */
136 static ldst_env env;
137
138 #ifdef DEBUG_libfirm
139
140 static firm_dbg_module_t *dbg;
141
142 /**
143  * Dumps the block list.
144  *
145  * @param ldst environment
146  */
147 static void dump_block_list(ldst_env *env) {
148         block_t *entry;
149         memop_t *op;
150         int     i;
151
152         for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
153                 DB((dbg, LEVEL_2, "%+F {", entry->block));
154
155                 i = 0;
156                 for (op = entry->memop_forward; op != NULL; op = op->next) {
157                         if (i == 0) {
158                                 DB((dbg, LEVEL_2, "\n\t"));
159                         }                       DB((dbg, LEVEL_2, "%+F", op->node));
160                         if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
161                                 DB((dbg, LEVEL_2, "X"));
162                         else if (op->flags & FLAG_KILL_ALL)
163                                 DB((dbg, LEVEL_2, "K"));
164                         DB((dbg, LEVEL_2, ", "));
165
166                         i = (i + 1) & 3;
167                 }
168                 DB((dbg, LEVEL_2, "\n}\n\n"));
169         }
170 }  /* dump_block_list */
171
172 /**
173  * Dumps the current set.
174  *
175  * @param bl   current block
176  * @param s    name of the set
177  */
178 static void dump_curr(block_t *bl, const char *s) {
179         unsigned end = env.rbs_size - 1;
180         unsigned pos;
181         int      i;
182
183         DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
184         i = 0;
185         for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
186                 memop_t *op = env.curr_id_2_memop[pos];
187
188                 if (i == 0) {
189                         DB((dbg, LEVEL_2, "\n\t"));
190                 }
191
192                 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
193                 i = (i + 1) & 3;
194         }
195         DB((dbg, LEVEL_2, "\n}\n"));
196 }  /* dump_curr */
197
198 #else
199 #define dump_block_list()
200 #define dump_curr(bl, s)
201 #endif /* DEBUG_libfirm */
202
203 /** Get the block entry for a block node */
204 static block_t *get_block_entry(const ir_node *block) {
205         assert(is_Block(block));
206
207         return get_irn_link(block);
208 }  /* get_block_entry */
209
210 /** Get the memop entry for a memory operation node */
211 static memop_t *get_irn_memop(const ir_node *irn) {
212         assert(! is_Block(irn));
213         return get_irn_link(irn);
214 }  /* get_irn_memop */
215
216 /**
217  * Walk over the memory edges from definition to users.
218  * This ensures, that even operation without memory output are found.
219  *
220  * @param irn   start node
221  * @param pre   pre walker function
222  * @param post  post walker function
223  * @param ctx   context parameter for the walker functions
224  */
225 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx) {
226         int     i;
227         ir_mode *mode;
228
229         mark_irn_visited(irn);
230
231         if (pre)
232                 pre(irn, ctx);
233
234         mode = get_irn_mode(irn);
235         if (mode == mode_M) {
236                 /* every successor uses memory */
237                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
238                         ir_node *succ = get_irn_out(irn, i);
239
240                         if (! irn_visited(succ))
241                                 walk_memory(succ, pre, post, ctx);
242                 }
243         } else if (mode == mode_T) {
244                 /* only some Proj's uses memory */
245                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
246                         ir_node *proj = get_irn_out(irn, i);
247
248                         if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
249                                 walk_memory(proj, pre, post, ctx);
250                 }
251         }
252         if (post)
253                 post(irn, ctx);
254 }  /* walk_memory */
255
256 /**
257  * Walks over all memory nodes of a graph.
258  *
259  * @param irg   a graph
260  * @param pre   pre walker function
261  * @param post  post walker function
262  * @param ctx   context parameter for the walker functions
263  */
264 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx) {
265         inc_irg_visited(irg);
266
267         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
268
269         /*
270          * there are two possible sources for memory: initial_mem and nomem
271          * we ignore nomem as this should NOT change the memory
272          */
273         walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
274
275         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
276 }  /* walk_memory_irg */
277
278 /**
279  * Register an address and allocate a (sparse, 0..n) ID for it.
280  *
281  * @param adr  the IR-node representing the address
282  *
283  * @return the allocated id
284  */
285 static unsigned register_address(ir_node *adr) {
286         address_entry *entry;
287
288         /* skip Confirms and Casts */
289 restart:
290         if (is_Confirm(adr)) {
291                 adr = get_Confirm_value(adr);
292                 goto restart;
293         }
294         if (is_Cast(adr)) {
295                 adr = get_Cast_op(adr);
296                 goto restart;
297         }
298
299         entry = ir_nodemap_get(&env.adr_map, adr);
300
301         if (entry == NULL) {
302                 /* new address */
303                 entry = obstack_alloc(&env.obst, sizeof(*entry));
304
305                 entry->id = env.curr_adr_id++;
306                 ir_nodemap_insert(&env.adr_map, adr, entry);
307
308                 DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
309 #ifdef DEBUG_libfirm
310                 ARR_APP1(ir_node *, env.id_2_address, adr);
311 #endif
312         }
313         return entry->id;
314 }  /* register_address */
315
316
317 /**
318  * translate an address through a Phi node into a given predecessor
319  * block.
320  *
321  * @param address  the address
322  * @param block    the block
323  * @param pos      the position of the predecessor in block
324  */
325 static ir_node *phi_translate(ir_node *address, const ir_node *block, int pos) {
326         if (is_Phi(address) && get_nodes_block(address) == block)
327                 address = get_Phi_pred(address, pos);
328         return address;
329 }  /* phi_translate */
330
331 /**
332  * Walker: allocate an block entry for every block
333  * and register all potential addresses.
334  */
335 static void prepare_blocks(ir_node *irn, void *ctx) {
336         (void)ctx;
337
338         if (is_Block(irn)) {
339                 block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
340                 int     n;
341
342                 entry->memop_forward    = NULL;
343                 entry->memop_backward   = NULL;
344                 entry->avail_out        = NULL;
345                 entry->id_2_memop_avail = NULL;
346                 entry->anticL_in        = NULL;
347                 entry->id_2_memop_antic = NULL;
348                 entry->block            = irn;
349                 entry->forward_next     = NULL;
350                 entry->backward_next    = NULL;
351                 entry->avail            = NULL;
352                 entry->trans_results    = NULL;
353                 set_irn_link(irn, entry);
354
355                 set_Block_phis(irn, NULL);
356
357                 /* use block marks to track unreachable blocks */
358                 set_Block_mark(irn, 0);
359
360                 n = get_Block_n_cfgpreds(irn);
361                 if (n > env.max_cfg_preds)
362                         env.max_cfg_preds = n;
363         } else {
364                 ir_mode *mode = get_irn_mode(irn);
365
366                 if (mode_is_reference(mode)) {
367                         /*
368                          * Register ALL possible addresses: this is overkill yet but
369                          * simpler then doing it for all possible translated addresses
370                          * (which would be sufficient in the moment.
371                          */
372                         (void)register_address(irn);
373                 }
374         }
375 }  /* prepare_blocks */
376
377 /**
378  * Post-Walker, link in all Phi's
379  */
380 static void link_phis(ir_node *irn, void *ctx) {
381         (void)ctx;
382
383         if (is_Phi(irn)) {
384                 ir_node *block = get_nodes_block(irn);
385                 add_Block_phi(block, irn);
386         }
387 }  /* link_phis */
388
389 /**
390  * Block walker: creates the inverse post-order list for the CFG.
391  */
392 static void inverse_post_order(ir_node *block, void *ctx) {
393         block_t *entry = get_block_entry(block);
394
395         (void)ctx;
396
397         /* mark this block IS reachable from start */
398         set_Block_mark(block, 1);
399
400         /* create the list in inverse order */
401         entry->forward_next = env.forward;
402         env.forward         = entry;
403
404         /* remember the first visited (last in list) entry, needed for later */
405         if (env.backward == NULL)
406                 env.backward = entry;
407 }  /* inverse_post_order */
408
409 /**
410  * Block walker: create backward links for the memops of a block.
411  */
412 static void collect_backward(ir_node *block, void *ctx) {
413         block_t *entry = get_block_entry(block);
414         memop_t *last, *op;
415
416         (void)ctx;
417
418         /*
419          * Do NOT link in the end block yet. We want it to be
420          * the first in the list. This is NOT guaranteed by the walker
421          * if we have endless loops.
422          */
423         if (block != env.end_bl) {
424                 entry->backward_next = env.backward;
425
426                 /* create the list in inverse order */
427                 env.backward = entry;
428         }
429
430         /* create backward links for all memory ops */
431         last = NULL;
432         for (op = entry->memop_forward; op != NULL; op = op->next) {
433                 op->prev = last;
434                 last     = op;
435         }
436         entry->memop_backward = last;
437 }  /* collect_backward */
438
439 /**
440  * Allocate a memop.
441  *
442  * @param irn  the IR-node representing the memop or NULL
443  *             if this is a translated (virtual) memop
444  *
445  * @return the allocated memop
446  */
447 static memop_t *alloc_memop(ir_node *irn) {
448         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
449
450         m->value.address = NULL;
451         m->value.value   = NULL;
452         m->value.mode    = NULL;
453
454         m->node          = irn;
455         m->mem           = NULL;
456         m->replace       = NULL;
457         m->next          = NULL;
458         m->flags         = 0;
459
460         memset(m->projs, 0, sizeof(m->projs));
461
462         if (irn != NULL)
463                 set_irn_link(irn, m);
464         return m;
465 }  /* alloc_memop */
466
467 /**
468  * Create a memop for a Phi-replacement.
469  *
470  * @param op   the memop to clone
471  * @param phi  the Phi-node representing the new value
472  */
473 static memop_t *clone_memop_phi(memop_t *op, ir_node *phi) {
474         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
475
476         m->value         = op->value;
477         m->value.value   = phi;
478
479         m->node          = phi;
480         m->replace       = NULL;
481         m->next          = NULL;
482         m->flags         = 0;
483
484         set_irn_link(phi, m);
485         return m;
486 }  /* clone_memop_phi */
487
488 /**
489  * Return the memory properties of a call node.
490  *
491  * @param call  the call node
492  *
493  * return a bitset of mtp_property_const and mtp_property_pure
494  */
495 static unsigned get_Call_memory_properties(ir_node *call) {
496         ir_type *call_tp = get_Call_type(call);
497         unsigned prop = get_method_additional_properties(call_tp);
498
499         /* check first the call type */
500         if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
501                 /* try the called entity */
502                 ir_node *ptr = get_Call_ptr(call);
503
504                 if (is_Global(ptr)) {
505                         ir_entity *ent = get_Global_entity(ptr);
506
507                         prop = get_entity_additional_properties(ent);
508                 }
509         }
510         return prop & (mtp_property_const|mtp_property_pure);
511 }  /* get_Call_memory_properties */
512
513 /**
514  * Returns an entity if the address ptr points to a constant one.
515  *
516  * @param ptr  the address
517  *
518  * @return an entity or NULL
519  */
520 static ir_entity *find_constant_entity(ir_node *ptr) {
521         for (;;) {
522                 if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
523                         return get_SymConst_entity(ptr);
524                 } else if (is_Sel(ptr)) {
525                         ir_entity *ent = get_Sel_entity(ptr);
526                         ir_type   *tp  = get_entity_owner(ent);
527
528                         /* Do not fiddle with polymorphism. */
529                         if (is_Class_type(get_entity_owner(ent)) &&
530                                 ((get_entity_n_overwrites(ent)    != 0) ||
531                                 (get_entity_n_overwrittenby(ent) != 0)   ) )
532                                 return NULL;
533
534                         if (is_Array_type(tp)) {
535                                 /* check bounds */
536                                 int i, n;
537
538                                 for (i = 0, n = get_Sel_n_indexs(ptr); i < n; ++i) {
539                                         ir_node *bound;
540                                         tarval *tlower, *tupper;
541                                         ir_node *index = get_Sel_index(ptr, i);
542                                         tarval *tv     = computed_value(index);
543
544                                         /* check if the index is constant */
545                                         if (tv == tarval_bad)
546                                                 return NULL;
547
548                                         bound  = get_array_lower_bound(tp, i);
549                                         tlower = computed_value(bound);
550                                         bound  = get_array_upper_bound(tp, i);
551                                         tupper = computed_value(bound);
552
553                                         if (tlower == tarval_bad || tupper == tarval_bad)
554                                                 return NULL;
555
556                                         if (tarval_cmp(tv, tlower) & pn_Cmp_Lt)
557                                                 return NULL;
558                                         if (tarval_cmp(tupper, tv) & pn_Cmp_Lt)
559                                                 return NULL;
560
561                                         /* ok, bounds check finished */
562                                 }
563                         }
564
565                         if (variability_constant == get_entity_variability(ent))
566                                 return ent;
567
568                         /* try next */
569                         ptr = get_Sel_ptr(ptr);
570                 } else if (is_Add(ptr)) {
571                         ir_node *l = get_Add_left(ptr);
572                         ir_node *r = get_Add_right(ptr);
573
574                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
575                                 ptr = l;
576                         else if (get_irn_mode(r) == get_irn_mode(ptr) && is_Const(l))
577                                 ptr = r;
578                         else
579                                 return NULL;
580
581                         /* for now, we support only one addition, reassoc should fold all others */
582                         if (! is_SymConst(ptr) && !is_Sel(ptr))
583                                 return NULL;
584                 } else if (is_Sub(ptr)) {
585                         ir_node *l = get_Sub_left(ptr);
586                         ir_node *r = get_Sub_right(ptr);
587
588                         if (get_irn_mode(l) == get_irn_mode(ptr) &&     is_Const(r))
589                                 ptr = l;
590                         else
591                                 return NULL;
592                         /* for now, we support only one subtraction, reassoc should fold all others */
593                         if (! is_SymConst(ptr) && !is_Sel(ptr))
594                                 return NULL;
595                 } else
596                         return NULL;
597         }
598 }  /* find_constant_entity */
599
600 /**
601  * Return the Selection index of a Sel node from dimension n
602  */
603 static long get_Sel_array_index_long(ir_node *n, int dim) {
604         ir_node *index = get_Sel_index(n, dim);
605         assert(is_Const(index));
606         return get_tarval_long(get_Const_tarval(index));
607 }  /* get_Sel_array_index_long */
608
609 /**
610  * Returns the accessed component graph path for an
611  * node computing an address.
612  *
613  * @param ptr    the node computing the address
614  * @param depth  current depth in steps upward from the root
615  *               of the address
616  */
617 static compound_graph_path *rec_get_accessed_path(ir_node *ptr, int depth) {
618         compound_graph_path *res = NULL;
619         ir_entity           *root, *field, *ent;
620         int                 path_len, pos, idx;
621         tarval              *tv;
622         ir_type             *tp;
623
624         if (is_SymConst(ptr)) {
625                 /* a SymConst. If the depth is 0, this is an access to a global
626                  * entity and we don't need a component path, else we know
627                  * at least its length.
628                  */
629                 assert(get_SymConst_kind(ptr) == symconst_addr_ent);
630                 root = get_SymConst_entity(ptr);
631                 res = (depth == 0) ? NULL : new_compound_graph_path(get_entity_type(root), depth);
632         } else if (is_Sel(ptr)) {
633                 /* it's a Sel, go up until we find the root */
634                 res = rec_get_accessed_path(get_Sel_ptr(ptr), depth+1);
635                 if (res == NULL)
636                         return NULL;
637
638                 /* fill up the step in the path at the current position */
639                 field    = get_Sel_entity(ptr);
640                 path_len = get_compound_graph_path_length(res);
641                 pos      = path_len - depth - 1;
642                 set_compound_graph_path_node(res, pos, field);
643
644                 if (is_Array_type(get_entity_owner(field))) {
645                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
646                         set_compound_graph_path_array_index(res, pos, get_Sel_array_index_long(ptr, 0));
647                 }
648         } else if (is_Add(ptr)) {
649                 ir_node *l    = get_Add_left(ptr);
650                 ir_node *r    = get_Add_right(ptr);
651                 ir_mode *mode = get_irn_mode(ptr);
652                 tarval  *tmp;
653
654                 if (is_Const(r) && get_irn_mode(l) == mode) {
655                         ptr = l;
656                         tv  = get_Const_tarval(r);
657                 } else {
658                         ptr = r;
659                         tv  = get_Const_tarval(l);
660                 }
661 ptr_arith:
662                 mode = get_tarval_mode(tv);
663                 tmp  = tv;
664
665                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
666                 if (is_Sel(ptr)) {
667                         field = get_Sel_entity(ptr);
668                 } else {
669                         field = get_SymConst_entity(ptr);
670                 }
671                 idx = 0;
672                 for (ent = field;;) {
673                         unsigned size;
674                         tarval   *sz, *tv_index, *tlower, *tupper;
675                         ir_node  *bound;
676
677                         tp = get_entity_type(ent);
678                         if (! is_Array_type(tp))
679                                 break;
680                         ent = get_array_element_entity(tp);
681                         size = get_type_size_bytes(get_entity_type(ent));
682                         sz   = new_tarval_from_long(size, mode);
683
684                         tv_index = tarval_div(tmp, sz);
685                         tmp      = tarval_mod(tmp, sz);
686
687                         if (tv_index == tarval_bad || tmp == tarval_bad)
688                                 return NULL;
689
690                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
691                         bound  = get_array_lower_bound(tp, 0);
692                         tlower = computed_value(bound);
693                         bound  = get_array_upper_bound(tp, 0);
694                         tupper = computed_value(bound);
695
696                         if (tlower == tarval_bad || tupper == tarval_bad)
697                                 return NULL;
698
699                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
700                                 return NULL;
701                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
702                                 return NULL;
703
704                         /* ok, bounds check finished */
705                         ++idx;
706                 }
707                 if (! tarval_is_null(tmp)) {
708                         /* access to some struct/union member */
709                         return NULL;
710                 }
711
712                 /* should be at least ONE array */
713                 if (idx == 0)
714                         return NULL;
715
716                 res = rec_get_accessed_path(ptr, depth + idx);
717                 if (res == NULL)
718                         return NULL;
719
720                 path_len = get_compound_graph_path_length(res);
721                 pos      = path_len - depth - idx;
722
723                 for (ent = field;;) {
724                         unsigned size;
725                         tarval   *sz, *tv_index;
726                         long     index;
727
728                         tp = get_entity_type(ent);
729                         if (! is_Array_type(tp))
730                                 break;
731                         ent = get_array_element_entity(tp);
732                         set_compound_graph_path_node(res, pos, ent);
733
734                         size = get_type_size_bytes(get_entity_type(ent));
735                         sz   = new_tarval_from_long(size, mode);
736
737                         tv_index = tarval_div(tv, sz);
738                         tv       = tarval_mod(tv, sz);
739
740                         /* worked above, should work again */
741                         assert(tv_index != tarval_bad && tv != tarval_bad);
742
743                         /* bounds already checked above */
744                         index = get_tarval_long(tv_index);
745                         set_compound_graph_path_array_index(res, pos, index);
746                         ++pos;
747                 }
748         } else if (is_Sub(ptr)) {
749                 ir_node *l = get_Sub_left(ptr);
750                 ir_node *r = get_Sub_right(ptr);
751
752                 ptr = l;
753                 tv  = get_Const_tarval(r);
754                 tv  = tarval_neg(tv);
755                 goto ptr_arith;
756         }
757         return res;
758 }  /* rec_get_accessed_path */
759
760 /**
761  * Returns an access path or NULL.  The access path is only
762  * valid, if the graph is in phase_high and _no_ address computation is used.
763  */
764 static compound_graph_path *get_accessed_path(ir_node *ptr) {
765         compound_graph_path *gr = rec_get_accessed_path(ptr, 0);
766         return gr;
767 }  /* get_accessed_path */
768
769 typedef struct path_entry {
770         ir_entity         *ent;
771         struct path_entry *next;
772         long              index;
773 } path_entry;
774
775 static ir_node *rec_find_compound_ent_value(ir_node *ptr, path_entry *next) {
776         path_entry       entry, *p;
777         ir_entity        *ent, *field;
778         ir_initializer_t *initializer;
779         tarval           *tv;
780         ir_type          *tp;
781         unsigned         n;
782
783         entry.next = next;
784         if (is_SymConst(ptr)) {
785                 /* found the root */
786                 ent         = get_SymConst_entity(ptr);
787                 initializer = get_entity_initializer(ent);
788                 for (p = next; p != NULL;) {
789                         if (initializer->kind != IR_INITIALIZER_COMPOUND)
790                                 return NULL;
791                         n  = get_initializer_compound_n_entries(initializer);
792                         tp = get_entity_type(ent);
793
794                         if (is_Array_type(tp)) {
795                                 ent = get_array_element_entity(tp);
796                                 if (ent != p->ent) {
797                                         /* a missing [0] */
798                                         if (0 >= n)
799                                                 return NULL;
800                                         initializer = get_initializer_compound_value(initializer, 0);
801                                         continue;
802                                 }
803                         }
804                         if (p->index >= (int) n)
805                                 return NULL;
806                         initializer = get_initializer_compound_value(initializer, p->index);
807
808                         ent = p->ent;
809                         p   = p->next;
810                 }
811                 tp = get_entity_type(ent);
812                 while (is_Array_type(tp)) {
813                         ent = get_array_element_entity(tp);
814                         tp = get_entity_type(ent);
815                         /* a missing [0] */
816                         n  = get_initializer_compound_n_entries(initializer);
817                         if (0 >= n)
818                                 return NULL;
819                         initializer = get_initializer_compound_value(initializer, 0);
820                 }
821
822                 switch (initializer->kind) {
823                 case IR_INITIALIZER_CONST:
824                         return get_initializer_const_value(initializer);
825                 case IR_INITIALIZER_TARVAL:
826                 case IR_INITIALIZER_NULL:
827                 default:
828                         return NULL;
829                 }
830         } else if (is_Sel(ptr)) {
831                 entry.ent = field = get_Sel_entity(ptr);
832                 tp = get_entity_owner(field);
833                 if (is_Array_type(tp)) {
834                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
835                         entry.index = get_Sel_array_index_long(ptr, 0) - get_array_lower_bound_int(tp, 0);
836                 } else {
837                         int i, n_members = get_compound_n_members(tp);
838                         for (i = 0; i < n_members; ++i) {
839                                 if (get_compound_member(tp, i) == field)
840                                         break;
841                         }
842                         if (i >= n_members) {
843                                 /* not found: should NOT happen */
844                                 return NULL;
845                         }
846                         entry.index = i;
847                 }
848                 return rec_find_compound_ent_value(get_Sel_ptr(ptr), &entry);
849         }  else if (is_Add(ptr)) {
850                 ir_node  *l = get_Add_left(ptr);
851                 ir_node  *r = get_Add_right(ptr);
852                 ir_mode  *mode;
853                 unsigned pos;
854
855                 if (is_Const(r)) {
856                         ptr = l;
857                         tv  = get_Const_tarval(r);
858                 } else {
859                         ptr = r;
860                         tv  = get_Const_tarval(l);
861                 }
862 ptr_arith:
863                 mode = get_tarval_mode(tv);
864
865                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
866                 if (is_Sel(ptr)) {
867                         field = get_Sel_entity(ptr);
868                 } else {
869                         field = get_SymConst_entity(ptr);
870                 }
871
872                 /* count needed entries */
873                 pos = 0;
874                 for (ent = field;;) {
875                         tp = get_entity_type(ent);
876                         if (! is_Array_type(tp))
877                                 break;
878                         ent = get_array_element_entity(tp);
879                         ++pos;
880                 }
881                 /* should be at least ONE entry */
882                 if (pos == 0)
883                         return NULL;
884
885                 /* allocate the right number of entries */
886                 NEW_ARR_A(path_entry, p, pos);
887
888                 /* fill them up */
889                 pos = 0;
890                 for (ent = field;;) {
891                         unsigned size;
892                         tarval   *sz, *tv_index, *tlower, *tupper;
893                         long     index;
894                         ir_node  *bound;
895
896                         tp = get_entity_type(ent);
897                         if (! is_Array_type(tp))
898                                 break;
899                         ent = get_array_element_entity(tp);
900                         p[pos].ent  = ent;
901                         p[pos].next = &p[pos + 1];
902
903                         size = get_type_size_bytes(get_entity_type(ent));
904                         sz   = new_tarval_from_long(size, mode);
905
906                         tv_index = tarval_div(tv, sz);
907                         tv       = tarval_mod(tv, sz);
908
909                         if (tv_index == tarval_bad || tv == tarval_bad)
910                                 return NULL;
911
912                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
913                         bound  = get_array_lower_bound(tp, 0);
914                         tlower = computed_value(bound);
915                         bound  = get_array_upper_bound(tp, 0);
916                         tupper = computed_value(bound);
917
918                         if (tlower == tarval_bad || tupper == tarval_bad)
919                                 return NULL;
920
921                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
922                                 return NULL;
923                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
924                                 return NULL;
925
926                         /* ok, bounds check finished */
927                         index = get_tarval_long(tv_index);
928                         p[pos].index = index;
929                         ++pos;
930                 }
931                 if (! tarval_is_null(tv)) {
932                         /* hmm, wrong access */
933                         return NULL;
934                 }
935                 p[pos - 1].next = next;
936                 return rec_find_compound_ent_value(ptr, p);
937         } else if (is_Sub(ptr)) {
938                 ir_node *l = get_Sub_left(ptr);
939                 ir_node *r = get_Sub_right(ptr);
940
941                 ptr = l;
942                 tv  = get_Const_tarval(r);
943                 tv  = tarval_neg(tv);
944                 goto ptr_arith;
945         }
946         return NULL;
947 }  /* rec_find_compound_ent_value */
948
949 static ir_node *find_compound_ent_value(ir_node *ptr) {
950         return rec_find_compound_ent_value(ptr, NULL);
951 }  /* find_compound_ent_value */
952
953 /**
954  * Mark a Load memop to be replace by a definition
955  *
956  * @param op  the Load memop
957  */
958 static void mark_replace_load(memop_t *op, ir_node *def) {
959         op->replace = def;
960         op->flags |= FLAG_KILLED_NODE;
961         env.changed = 1;
962 }  /* mark_replace_load */
963
964 /**
965  * Mark a Store memop to be removed.
966  *
967  * @param op  the Store memop
968  */
969 static void mark_remove_store(memop_t *op) {
970         op->flags |= FLAG_KILLED_NODE;
971         env.changed = 1;
972 }  /* mark_remove_store */
973
974 /**
975  * Update a memop for a Load.
976  *
977  * @param m  the memop
978  */
979 static void update_Load_memop(memop_t *m) {
980         int       i;
981         ir_node   *load = m->node;
982         ir_node   *ptr;
983         ir_entity *ent;
984
985         if (get_Load_volatility(load) == volatility_is_volatile)
986                 m->flags |= FLAG_IGNORE;
987
988         ptr = get_Load_ptr(load);
989
990         m->value.address = ptr;
991
992         for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
993                 ir_node *proj = get_irn_out(load, i);
994                 long    pn;
995
996                 /* beware of keep edges */
997                 if (is_End(proj))
998                         continue;
999
1000                 pn = get_Proj_proj(proj);
1001                 m->projs[pn] = proj;
1002                 switch (pn) {
1003                 case pn_Load_res:
1004                         m->value.value = proj;
1005                         m->value.mode  = get_irn_mode(proj);
1006                         break;
1007                 case pn_Load_X_except:
1008                         m->flags |= FLAG_EXCEPTION;
1009                         break;
1010                 case pn_Load_M:
1011                         m->mem = proj;
1012                         break;
1013                 case pn_Load_X_regular:
1014                         break;
1015                 default:
1016                         panic("Unsupported Proj from Load %+F", proj);
1017                 }
1018         }
1019
1020         /* check if we can determine the entity that will be loaded */
1021         ent = find_constant_entity(ptr);
1022
1023         if (ent != NULL                                     &&
1024                 allocation_static == get_entity_allocation(ent) &&
1025                 visibility_external_allocated != get_entity_visibility(ent)) {
1026                 /* a static allocation that is not external: there should be NO exception
1027                  * when loading even if we cannot replace the load itself. */
1028                 ir_node *value = NULL;
1029
1030                 /* no exception, clear the m fields as it might be checked later again */
1031                 if (m->projs[pn_Load_X_except]) {
1032                         exchange(m->projs[pn_Load_X_except], new_Bad());
1033                         m->projs[pn_Load_X_except] = NULL;
1034                         m->flags &= ~FLAG_EXCEPTION;
1035                         env.changed = 1;
1036                 }
1037                 if (m->projs[pn_Load_X_regular]) {
1038                         exchange(m->projs[pn_Load_X_regular], new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1039                         m->projs[pn_Load_X_regular] = NULL;
1040                         env.changed = 1;
1041                 }
1042
1043                 if (variability_constant == get_entity_variability(ent)) {
1044                         if (is_atomic_entity(ent)) {
1045                                 /* Might not be atomic after lowering of Sels.  In this case we
1046                                  * could also load, but it's more complicated. */
1047                                 /* more simpler case: we load the content of a constant value:
1048                                  * replace it by the constant itself */
1049                                 value = get_atomic_ent_value(ent);
1050                         } else if (ent->has_initializer) {
1051                                 /* new style initializer */
1052                                 value = find_compound_ent_value(ptr);
1053                         } else {
1054                                 /* old style initializer */
1055                                 compound_graph_path *path = get_accessed_path(ptr);
1056
1057                                 if (path != NULL) {
1058                                         assert(is_proper_compound_graph_path(path, get_compound_graph_path_length(path)-1));
1059
1060                                         value = get_compound_ent_value_by_path(ent, path);
1061                                         DB((dbg, LEVEL_1, "  Constant access at %F%F resulted in %+F\n", ent, path, value));
1062                                         free_compound_graph_path(path);
1063                                 }
1064                         }
1065                         if (value != NULL)
1066                                 value = can_replace_load_by_const(load, value);
1067                 }
1068
1069                 if (value != NULL) {
1070                         /* we completely replace the load by this value */
1071                         DB((dbg, LEVEL_1, "Replacing Load %+F by constant %+F\n", m->node, value));
1072                         mark_replace_load(m, value);
1073                         return;
1074                 }
1075         }
1076
1077         if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
1078                 /* only create an address if this node is NOT killed immediately or ignored */
1079                 m->value.id = register_address(ptr);
1080                 ++env.n_mem_ops;
1081         } else {
1082                 /* no user, KILL it */
1083                 mark_replace_load(m, NULL);
1084         }
1085 }  /* update_Load_memop */
1086
1087 /**
1088  * Update a memop for a Store.
1089  *
1090  * @param m  the memop
1091  */
1092 static void update_Store_memop(memop_t *m) {
1093         int     i;
1094         ir_node *store = m->node;
1095         ir_node *adr   = get_Store_ptr(store);
1096
1097         if (get_Store_volatility(store) == volatility_is_volatile) {
1098                 m->flags |= FLAG_IGNORE;
1099         } else {
1100                 /* only create an address if this node is NOT ignored */
1101                 m->value.id = register_address(adr);
1102                 ++env.n_mem_ops;
1103         }
1104
1105         m->value.address = adr;
1106
1107         for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
1108                 ir_node *proj = get_irn_out(store, i);
1109                 long    pn;
1110
1111                 /* beware of keep edges */
1112                 if (is_End(proj))
1113                         continue;
1114
1115                 pn = get_Proj_proj(proj);
1116                 m->projs[pn] = proj;
1117                 switch (pn) {
1118                 case pn_Store_X_except:
1119                         m->flags |= FLAG_EXCEPTION;
1120                         break;
1121                 case pn_Store_M:
1122                         m->mem = proj;
1123                         break;
1124                 case pn_Store_X_regular:
1125                         break;
1126                 default:
1127                         panic("Unsupported Proj from Store %+F", proj);
1128                 }
1129         }
1130         m->value.value = get_Store_value(store);
1131         m->value.mode  = get_irn_mode(m->value.value);
1132 }  /* update_Store_memop */
1133
1134 /**
1135  * Update a memop for a Call.
1136  *
1137  * @param m  the memop
1138  */
1139 static void update_Call_memop(memop_t *m) {
1140         ir_node  *call = m->node;
1141         unsigned prop  = get_Call_memory_properties(call);
1142         int      i;
1143
1144         if (prop & mtp_property_const) {
1145                 /* A constant call did NOT use memory at all, we
1146                    can kick it from the list. */
1147         } else if (prop & mtp_property_pure) {
1148                 /* pure calls READ memory */
1149                 m->flags = 0;
1150         } else
1151                 m->flags = FLAG_KILL_ALL;
1152
1153         for (i = get_irn_n_outs(call) - 1; i >= 0; --i) {
1154                 ir_node *proj = get_irn_out(call, i);
1155
1156                 /* beware of keep edges */
1157                 if (is_End(proj))
1158                         continue;
1159
1160                 switch (get_Proj_proj(proj)) {
1161                 case pn_Call_X_except:
1162                         m->flags |= FLAG_EXCEPTION;
1163                         break;
1164                 case pn_Call_M_regular:
1165                         m->mem = proj;
1166                         break;
1167                 }
1168         }
1169 }  /* update_Call_memop */
1170
1171 /**
1172  * Update a memop for a Div/Mod/Quot/DivMod.
1173  *
1174  * @param m  the memop
1175  */
1176 static void update_DivOp_memop(memop_t *m) {
1177         ir_node *div = m->node;
1178         int     i;
1179
1180         for (i = get_irn_n_outs(div) - 1; i >= 0; --i) {
1181                 ir_node *proj = get_irn_out(div, i);
1182
1183                 /* beware of keep edges */
1184                 if (is_End(proj))
1185                         continue;
1186
1187                 switch (get_Proj_proj(proj)) {
1188                 case pn_Generic_X_except:
1189                         m->flags |= FLAG_EXCEPTION;
1190                         break;
1191                 case pn_Generic_M_regular:
1192                         m->mem = proj;
1193                         break;
1194                 }
1195         }
1196 }  /* update_DivOp_memop */
1197
1198 /**
1199  * Update a memop for a Phi.
1200  *
1201  * @param m  the memop
1202  */
1203 static void update_Phi_memop(memop_t *m) {
1204         /* the Phi is it's own mem */
1205         m->mem = m->node;
1206 }  /* update_Phi_memop */
1207
1208 /**
1209  * Memory walker: collect all memory ops and build topological lists.
1210  */
1211 static void collect_memops(ir_node *irn, void *ctx) {
1212         memop_t  *op;
1213         ir_node  *block;
1214         block_t  *entry;
1215
1216         (void) ctx;
1217         if (is_Proj(irn)) {
1218                 /* we can safely ignore ProjM's except the initial memory */
1219                 if (irn != get_irg_initial_mem(current_ir_graph))
1220                         return;
1221         }
1222
1223         op    = alloc_memop(irn);
1224         block = get_nodes_block(irn);
1225         entry = get_block_entry(block);
1226
1227         if (is_Phi(irn)) {
1228                 update_Phi_memop(op);
1229                 /* Phis must be always placed first */
1230                 op->next = entry->memop_forward;
1231                 entry->memop_forward = op;
1232                 if (entry->memop_backward == NULL)
1233                         entry->memop_backward = op;
1234         } else {
1235                 switch (get_irn_opcode(irn)) {
1236                 case iro_Load:
1237                         update_Load_memop(op);
1238                         break;
1239                 case iro_Store:
1240                         update_Store_memop(op);
1241                         break;
1242                 case iro_Call:
1243                         update_Call_memop(op);
1244                         break;
1245                 case iro_Sync:
1246                 case iro_Pin:
1247                         op->mem = irn;
1248                         break;
1249                 case iro_Proj:
1250                         /* initial memory */
1251                         op->mem = irn;
1252                         break;
1253                 case iro_Return:
1254                 case iro_End:
1255                         /* we can those to find the memory edge */
1256                         break;
1257                 case iro_Div:
1258                 case iro_DivMod:
1259                 case iro_Quot:
1260                 case iro_Mod:
1261                         update_DivOp_memop(op);
1262                         break;
1263
1264                 case iro_Builtin:
1265                         /* TODO: handle some builtins */
1266                 default:
1267                         /* unsupported operation */
1268                         op->flags = FLAG_KILL_ALL;
1269                 }
1270
1271
1272                 /* all other should be placed last */
1273                 if (entry->memop_backward == NULL) {
1274                         entry->memop_forward = entry->memop_backward = op;
1275                 } else {
1276                         entry->memop_backward->next = op;
1277                         entry->memop_backward       = op;
1278                 }
1279         }
1280 }  /* collect_memops */
1281
1282 /**
1283  * Find an address in the current set.
1284  *
1285  * @param value  the value to be searched for
1286  *
1287  * @return a memop for the value or NULL if the value does
1288  *         not exists in the set or cannot be converted into
1289  *         the requested mode
1290  */
1291 static memop_t *find_address(const value_t *value) {
1292         if (rbitset_is_set(env.curr_set, value->id)) {
1293                 memop_t *res = env.curr_id_2_memop[value->id];
1294
1295                 if (res->value.mode == value->mode)
1296                         return res;
1297                 /* allow hidden casts */
1298                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1299                     get_mode_arithmetic(value->mode) == irma_twos_complement &&
1300                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1301                         return res;
1302         }
1303         return NULL;
1304 }  /* find_address */
1305
1306 /**
1307  * Find an address in the avail_out set.
1308  *
1309  * @param bl     the block
1310  */
1311 static memop_t *find_address_avail(const block_t *bl, unsigned id, const ir_mode *mode) {
1312         if (rbitset_is_set(bl->avail_out, id)) {
1313                 memop_t *res = bl->id_2_memop_avail[id];
1314
1315                 if (res->value.mode == mode)
1316                         return res;
1317                 /* allow hidden casts */
1318                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1319                     get_mode_arithmetic(mode) == irma_twos_complement &&
1320                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(mode))
1321                         return res;
1322         }
1323         return NULL;
1324 }  /* find_address_avail */
1325
1326 /**
1327  * Kill all addresses from the current set.
1328  */
1329 static void kill_all(void) {
1330         rbitset_clear_all(env.curr_set, env.rbs_size);
1331
1332         /* set sentinel */
1333         rbitset_set(env.curr_set, env.rbs_size - 1);
1334 }  /* kill_all */
1335
1336 /**
1337  * Kill memops that are not alias free due to a Store value from the current set.
1338  *
1339  * @param value  the Store value
1340  */
1341 static void kill_memops(const value_t *value) {
1342         unsigned end = env.rbs_size - 1;
1343         unsigned pos;
1344
1345         for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1346                 memop_t *op = env.curr_id_2_memop[pos];
1347
1348                 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
1349                                                           op->value.address, op->value.mode)) {
1350                         rbitset_clear(env.curr_set, pos);
1351                         env.curr_id_2_memop[pos] = NULL;
1352                         DB((dbg, LEVEL_2, "KILLING %+F because of possible alias address %+F\n", op->node, value->address));
1353                 }
1354         }
1355 }  /* kill_memops */
1356
1357 /**
1358  * Add the value of a memop to the current set.
1359  *
1360  * @param op  the memory op
1361  */
1362 static void add_memop(memop_t *op) {
1363         rbitset_set(env.curr_set, op->value.id);
1364         env.curr_id_2_memop[op->value.id] = op;
1365 }  /* add_memop */
1366
1367 /**
1368  * Add the value of a memop to the avail_out set.
1369  *
1370  * @param bl  the block
1371  * @param op  the memory op
1372  */
1373 static void add_memop_avail(block_t *bl, memop_t *op) {
1374         rbitset_set(bl->avail_out, op->value.id);
1375         bl->id_2_memop_avail[op->value.id] = op;
1376 }  /* add_memop_avail */
1377
1378 /**
1379  * Check, if we can convert a value of one mode to another mode
1380  * without changing the representation of bits.
1381  *
1382  * @param from  the original mode
1383  * @param to    the destination mode
1384  */
1385 static int can_convert_to(const ir_mode *from, const ir_mode *to) {
1386         if (get_mode_arithmetic(from) == irma_twos_complement &&
1387             get_mode_arithmetic(to) == irma_twos_complement &&
1388             get_mode_size_bits(from) == get_mode_size_bits(to))
1389                 return 1;
1390         return 0;
1391 }  /* can_convert_to */
1392
1393 /**
1394  * Add a Conv to the requested mode if needed.
1395  *
1396  * @param irn   the IR-node to convert
1397  * @param mode  the destination mode
1398  *
1399  * @return the possible converted node or NULL
1400  *         if the conversion is not possible
1401  */
1402 static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
1403         ir_mode *other = get_irn_mode(irn);
1404         if (other != mode) {
1405                 /* different modes: check if conversion is possible without changing the bits */
1406                 if (can_convert_to(other, mode)) {
1407                         ir_node *block = get_nodes_block(irn);
1408                         return new_r_Conv(current_ir_graph, block, irn, mode);
1409                 }
1410                 /* otherwise not possible ... yet */
1411                 return NULL;
1412         }
1413         return irn;
1414 }  /* conv_to */
1415
1416 /**
1417  * Update the address of an value if this address was a load result
1418  * and the load is killed now.
1419  *
1420  * @param value  the value whose address is updated
1421  */
1422 static void update_address(value_t *value) {
1423         if (is_Proj(value->address)) {
1424                 ir_node *load = get_Proj_pred(value->address);
1425
1426                 if (is_Load(load)) {
1427                         const memop_t *op = get_irn_memop(load);
1428
1429                         if (op->flags & FLAG_KILLED_NODE)
1430                                 value->address = op->replace;
1431                 }
1432         }
1433 }  /* update_address */
1434
1435 /**
1436  * Do forward dataflow analysis on the given block and calculate the
1437  * GEN and KILL in the current (avail) set.
1438  *
1439  * @param bl  the block
1440  */
1441 static void calc_gen_kill_avail(block_t *bl) {
1442         memop_t *op;
1443         ir_node *def;
1444
1445         for (op = bl->memop_forward; op != NULL; op = op->next) {
1446                 switch (get_irn_opcode(op->node)) {
1447                 case iro_Phi:
1448                         /* meet */
1449                         break;
1450                 case iro_Sync:
1451                         /* join */
1452                         break;
1453                 case iro_Load:
1454                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1455                                 /* do we have this already? */
1456                                 memop_t *other;
1457
1458                                 update_address(&op->value);
1459                                 other = find_address(&op->value);
1460                                 if (other != NULL && other != op) {
1461                                         def = conv_to(other->value.value, op->value.mode);
1462                                         if (def != NULL) {
1463 #ifdef DEBUG_libfirm
1464                                                 if (is_Store(other->node)) {
1465                                                         /* RAW */
1466                                                         DB((dbg, LEVEL_1, "RAW %+F <- %+F(%+F)\n", op->node, def, other->node));
1467                                                 } else {
1468                                                         /* RAR */
1469                                                         DB((dbg, LEVEL_1, "RAR %+F <- %+F(%+F)\n", op->node, def, other->node));
1470                                                 }
1471 #endif
1472                                                 mark_replace_load(op, def);
1473                                                 /* do NOT change the memop table */
1474                                                 continue;
1475                                         }
1476                                 }
1477                                 /* add this value */
1478                                 add_memop(op);
1479                         }
1480                         break;
1481                 case iro_Store:
1482                         if (! (op->flags & FLAG_KILLED_NODE)) {
1483                                 /* do we have this store already */
1484                                 memop_t *other;
1485
1486                                 update_address(&op->value);
1487                                 other = find_address(&op->value);
1488                                 if (other != NULL) {
1489                                         if (is_Store(other->node)) {
1490                                                 if (op != other && !(other->flags & FLAG_IGNORE) &&
1491                                                     get_nodes_block(other->node) == get_nodes_block(op->node)) {
1492                                                         /*
1493                                                          * A WAW in the same block we can kick the first store.
1494                                                          * This is a shortcut: we know that the second Store will be anticipated
1495                                                          * then in an case.
1496                                                          */
1497                                                         DB((dbg, LEVEL_1, "WAW %+F <- %+F\n", other->node, op->node));
1498                                                         mark_remove_store(other);
1499                                                         /* FIXME: a Load might be get freed due to this killed store */
1500                                                 }
1501                                         } else if (other->value.value == op->value.value && !(op->flags & FLAG_IGNORE)) {
1502                                                 /* WAR */
1503                                                 DB((dbg, LEVEL_1, "WAR %+F <- %+F\n", op->node, other->node));
1504                                                 mark_remove_store(op);
1505                                                 /* do NOT change the memop table */
1506                                                 continue;
1507                                         }
1508                                 }
1509                                 /* KILL all possible aliases */
1510                                 kill_memops(&op->value);
1511                                 /* add this value */
1512                                 add_memop(op);
1513                         }
1514                         break;
1515                 default:
1516                         if (op->flags & FLAG_KILL_ALL)
1517                                 kill_all();
1518                 }
1519         }
1520 }  /* calc_gen_kill_avail */
1521
1522 #define BYTE_SIZE(x)  (((x) + 7) >> 3)
1523
1524 /**
1525  * Do forward dataflow analysis on a given block to calculate the avail_out set
1526  * for this block only.
1527  *
1528  * @param block  the block
1529  */
1530 static void forward_avail(block_t *bl) {
1531         /* fill the data from the current block */
1532         env.curr_id_2_memop = bl->id_2_memop_avail;
1533         env.curr_set        = bl->avail_out;
1534
1535         calc_gen_kill_avail(bl);
1536         dump_curr(bl, "Avail_out");
1537 }  /* forward_avail */
1538
1539 /**
1540  * Do backward dataflow analysis on a given block to calculate the antic set
1541  * of Loaded addresses.
1542  *
1543  * @param bl  the block
1544  *
1545  * @return non-zero if the set has changed since last iteration
1546  */
1547 static int backward_antic(block_t *bl) {
1548         memop_t *op;
1549         ir_node *block = bl->block;
1550         int     n = get_Block_n_cfg_outs(block);
1551
1552         if (n == 1) {
1553                 ir_node  *succ    = get_Block_cfg_out(block, 0);
1554                 block_t  *succ_bl = get_block_entry(succ);
1555                 int      pred_pos = get_Block_cfgpred_pos(succ, block);
1556                 unsigned end      = env.rbs_size - 1;
1557                 unsigned pos;
1558
1559                 kill_all();
1560
1561                 if (bl->trans_results == NULL) {
1562                         /* allocate the translate cache */
1563                         unsigned size = env.curr_adr_id * sizeof(bl->trans_results[0]);
1564                         bl->trans_results = obstack_alloc(&env.obst, size);
1565                         memset(bl->trans_results, 0, size);
1566                 }
1567
1568                 /* check for partly redundant values */
1569                 for (pos = rbitset_next(succ_bl->anticL_in, 0, 1);
1570                      pos < end;
1571                      pos = rbitset_next(succ_bl->anticL_in, pos + 1, 1)) {
1572                         /*
1573                          * do Phi-translation here: Note that at this point the nodes are
1574                          * not changed, so we can safely cache the results.
1575                          * However: Loads of Load results ARE bad, because we have no way
1576                           to translate them yet ...
1577                          */
1578                         memop_t *op = bl->trans_results[pos];
1579                         if (op == NULL) {
1580                                 /* not yet translated */
1581                                 ir_node *adr, *trans_adr;
1582
1583                                 op  = succ_bl->id_2_memop_antic[pos];
1584                                 adr = op->value.address;
1585
1586                                 trans_adr = phi_translate(adr, succ, pred_pos);
1587                                 if (trans_adr != adr) {
1588                                         /* create a new entry for the translated one */
1589                                         memop_t *new_op;
1590
1591                                         new_op = alloc_memop(NULL);
1592                                         new_op->value.address = trans_adr;
1593                                         new_op->value.id      = register_address(trans_adr);
1594                                         new_op->value.mode    = op->value.mode;
1595                                         new_op->node          = op->node; /* we need the node to decide if Load/Store */
1596                                         new_op->flags         = op->flags;
1597
1598                                         bl->trans_results[pos] = new_op;
1599                                         op = new_op;
1600                                 }
1601                         }
1602                         env.curr_id_2_memop[op->value.id] = op;
1603                         rbitset_set(env.curr_set, op->value.id);
1604                 }
1605         } else if (n > 1) {
1606                 ir_node *succ    = get_Block_cfg_out(block, 0);
1607                 block_t *succ_bl = get_block_entry(succ);
1608                 int i;
1609
1610                 rbitset_cpy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1611                 memcpy(env.curr_id_2_memop, succ_bl->id_2_memop_antic, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1612
1613                 /* Hmm: probably we want kill merges of Loads ans Stores here */
1614                 for (i = n - 1; i > 0; --i) {
1615                         ir_node *succ    = get_Block_cfg_out(bl->block, i);
1616                         block_t *succ_bl = get_block_entry(succ);
1617
1618                         rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1619                 }
1620         } else {
1621                 /* block ends with a noreturn call */
1622                 kill_all();
1623         }
1624
1625         dump_curr(bl, "AnticL_out");
1626
1627         for (op = bl->memop_backward; op != NULL; op = op->prev) {
1628                 switch (get_irn_opcode(op->node)) {
1629                 case iro_Phi:
1630                         /* meet */
1631                         break;
1632                 case iro_Sync:
1633                         /* join */
1634                         break;
1635                 case iro_Load:
1636                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1637                                 /* always add it */
1638                                 add_memop(op);
1639                         }
1640                         break;
1641                 case iro_Store:
1642                         if (! (op->flags & FLAG_KILLED_NODE)) {
1643                                 /* a Store: check which memops must be killed */
1644                                 kill_memops(&op->value);
1645                         }
1646                         break;
1647                 default:
1648                         if (op->flags & FLAG_KILL_ALL)
1649                                 kill_all();
1650                 }
1651         }
1652
1653         memcpy(bl->id_2_memop_antic, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1654         if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1655                 /* changed */
1656                 rbitset_cpy(bl->anticL_in, env.curr_set, env.rbs_size);
1657                 dump_curr(bl, "AnticL_in*");
1658                 return 1;
1659         }
1660         dump_curr(bl, "AnticL_in");
1661         return 0;
1662 }  /* backward_antic */
1663
1664 /**
1665  * Replace a Load memop by a already known value.
1666  *
1667  * @param op  the Load memop
1668  */
1669 static void replace_load(memop_t *op) {
1670         ir_node *load = op->node;
1671         ir_node *def  = skip_Id(op->replace);
1672         ir_node *proj;
1673         ir_mode *mode;
1674
1675         if (def != NULL)
1676                 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1677         else {
1678                 if (op->flags & FLAG_EXCEPTION) {
1679                         /* bad: this node is unused and executed for exception only */
1680                         DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1681                         return;
1682                 }
1683                 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1684         }
1685
1686         if (op->mem != NULL) {
1687                 /* in rare cases a Load might have NO memory */
1688                 exchange(op->mem, get_Load_mem(load));
1689         }
1690         proj = op->projs[pn_Load_res];
1691         if (proj != NULL) {
1692                 mode = get_irn_mode(proj);
1693                 if (get_irn_mode(def) != mode) {
1694                         /* a hidden cast */
1695                         dbg_info *db    = get_irn_dbg_info(load);
1696                         ir_node  *block = get_nodes_block(proj);
1697                         def = new_rd_Conv(db, current_ir_graph, block, def, mode);
1698                 }
1699                 exchange(proj, def);
1700         }
1701         proj = op->projs[pn_Load_X_except];
1702         if (proj != NULL) {
1703                 exchange(proj, new_Bad());
1704         }
1705         proj = op->projs[pn_Load_X_regular];
1706         if (proj != NULL) {
1707                 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1708         }
1709 }  /* replace_load */
1710
1711 /**
1712  * Remove a Store memop.
1713  *
1714  * @param op  the Store memop
1715  */
1716 static void remove_store(memop_t *op) {
1717         ir_node *store = op->node;
1718         ir_node *proj;
1719
1720         DB((dbg, LEVEL_1, "Removing %+F\n", store));
1721
1722         if (op->mem != NULL) {
1723                 /* in rare cases a Store might have no memory */
1724                 exchange(op->mem, get_Store_mem(store));
1725         }
1726         proj = op->projs[pn_Store_X_except];
1727         if (proj != NULL) {
1728                 exchange(proj, new_Bad());
1729         }
1730         proj = op->projs[pn_Store_X_regular];
1731         if (proj != NULL) {
1732                 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(store)));
1733         }
1734 }  /* remove_store */
1735
1736
1737 /**
1738  * Do all necessary replacements for a given block.
1739  *
1740  * @param bl  the block
1741  */
1742 static void do_replacements(block_t *bl) {
1743         memop_t *op;
1744
1745         for (op = bl->memop_forward; op != NULL; op = op->next) {
1746                 if (op->flags & FLAG_KILLED_NODE) {
1747                         switch (get_irn_opcode(op->node)) {
1748                         case iro_Load:
1749                                 replace_load(op);
1750                                 break;
1751                         case iro_Store:
1752                                 remove_store(op);
1753                                 break;
1754                         }
1755                 }
1756         }
1757 }  /* do_replacements */
1758
1759 /**
1760  * Calculate the Avail_out sets for all basic blocks.
1761  */
1762 static void calcAvail(void) {
1763         memop_t  **tmp_memop = env.curr_id_2_memop;
1764         unsigned *tmp_set    = env.curr_set;
1765         block_t  *bl;
1766
1767         /* calculate avail_out */
1768         DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1769
1770         /* iterate over all blocks in in any order, skip the start block */
1771         for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1772                 forward_avail(bl);
1773         }
1774
1775         /* restore the current sets */
1776         env.curr_id_2_memop = tmp_memop;
1777         env.curr_set        = tmp_set;
1778 }  /* calcAvail */
1779
1780 /**
1781  * Calculate the Antic_in sets for all basic blocks.
1782  */
1783 static void calcAntic(void) {
1784         int i, need_iter;
1785
1786         /* calculate antic_out */
1787         DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1788         i = 0;
1789         do {
1790                 block_t *bl;
1791
1792                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1793
1794                 need_iter = 0;
1795
1796                 /* over all blocks in reverse post order */
1797                 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1798                         need_iter |= backward_antic(bl);
1799                 }
1800                 ++i;
1801         } while (need_iter);
1802         DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1803 }  /* calcAntic */
1804
1805 /**
1806  * Return the node representing the last memory in a block.
1807  *
1808  * @param bl  the block
1809  */
1810 static ir_node *find_last_memory(block_t *bl) {
1811         for (;;) {
1812                 if (bl->memop_backward != NULL) {
1813                         return bl->memop_backward->mem;
1814                 }
1815                 /* if there is NO memory in this block, go to the dominator */
1816                 bl = get_block_entry(get_Block_idom(bl->block));
1817         }
1818 }  /* find_last_memory */
1819
1820 /**
1821  * Reroute all memory users of old memory
1822  * to a new memory IR-node.
1823  *
1824  * @param omem  the old memory IR-node
1825  * @param nmem  the new memory IR-node
1826  */
1827 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
1828         int i;
1829
1830         for (i = get_irn_n_outs(omem) - 1; i >= 0; --i) {
1831                 int     n_pos;
1832                 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1833
1834                 set_irn_n(user, n_pos, nmem);
1835         }
1836
1837         /* all edges previously point to omem now point to nmem */
1838         nmem->out = omem->out;
1839 }  /* reroute_all_mem_users */
1840
1841 /**
1842  * Reroute memory users of old memory that are dominated by a given block
1843  * to a new memory IR-node.
1844  *
1845  * @param omem     the old memory IR-node
1846  * @param nmem     the new memory IR-node
1847  * @param pass_bl  the block the memory must pass
1848  */
1849 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl) {
1850         int             i, j, n = get_irn_n_outs(omem);
1851         ir_def_use_edge *edges = NEW_ARR_D(ir_def_use_edge, &env.obst, n + 1);
1852
1853         for (i = j = 0; i < n; ++i) {
1854                 int     n_pos;
1855                 ir_node *user   = get_irn_out_ex(omem, i, &n_pos);
1856                 ir_node *use_bl = get_nodes_block(user);
1857
1858
1859                 if (is_Phi(user)) {
1860                         use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1861                 }
1862                 if (block_dominates(pass_bl, use_bl)) {
1863                         /* found an user that is dominated */
1864                         ++j;
1865                         edges[j].pos = n_pos;
1866                         edges[j].use = user;
1867
1868                         set_irn_n(user, n_pos, nmem);
1869                 }
1870         }
1871
1872         /* Modify the out structure: we create a new out edge array on our
1873            temporary obstack here. This should be no problem, as we invalidate the edges
1874            at the end either. */
1875         /* first entry is used for the length */
1876         edges[0].pos = j;
1877         nmem->out = edges;
1878 }  /* reroute_mem_through */
1879
1880 /**
1881  * insert Loads, making partly redundant Loads fully redundant
1882  */
1883 static int insert_Load(block_t *bl) {
1884         ir_node  *block = bl->block;
1885         int      i, n = get_Block_n_cfgpreds(block);
1886         unsigned end = env.rbs_size - 1;
1887         unsigned pos;
1888
1889         DB((dbg, LEVEL_3, "processing %+F\n", block));
1890
1891         if (n == 0) {
1892                 /* might still happen for an unreachable block (end for instance) */
1893                 return 0;
1894         }
1895
1896         if (n > 1) {
1897                 ir_node **ins;
1898                 int     pos;
1899
1900                 NEW_ARR_A(ir_node *, ins, n);
1901
1902                 rbitset_set_all(env.curr_set, env.rbs_size);
1903
1904                 /* More than one predecessors, calculate the join for all avail_outs ignoring unevaluated
1905                    Blocks. These put in Top anyway. */
1906                 for (i = n - 1; i >= 0; --i) {
1907                         ir_node *pred = skip_Proj(get_Block_cfgpred(block, i));
1908                         ir_node *blk  = get_nodes_block(pred);
1909                         block_t *pred_bl;
1910
1911                         pred_bl = get_block_entry(blk);
1912                         rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
1913
1914                         if (is_Load(pred) || is_Store(pred)) {
1915                                 /* We reached this block by an exception from a Load or Store:
1916                                  * the memop creating the exception was NOT completed than, kill it
1917                                  */
1918                                 memop_t *exc_op = get_irn_memop(pred);
1919                                 rbitset_clear(env.curr_set, exc_op->value.id);
1920                         }
1921
1922                 }
1923                 /*
1924                  * Ensure that all values are in the map: build Phi's if necessary:
1925                  * Note: the last bit is the sentinel and ALWAYS set, so start with -2.
1926                  */
1927                 for (pos = env.rbs_size - 2; pos >= 0; --pos) {
1928                         if (! rbitset_is_set(env.curr_set, pos))
1929                                 env.curr_id_2_memop[pos] = NULL;
1930                         else {
1931                                 ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
1932                                 block_t *pred_bl = get_block_entry(pred);
1933                                 int     need_phi = 0;
1934                                 memop_t *first   = NULL;
1935                                 ir_mode *mode;
1936
1937                                 for (i = 0; i < n; ++i) {
1938                                         memop_t *mop;
1939
1940                                         pred    = get_Block_cfgpred_block(bl->block, i);
1941                                         pred_bl = get_block_entry(pred);
1942
1943                                         mop = pred_bl->id_2_memop_avail[pos];
1944                                         if (first == NULL) {
1945                                                 first = mop;
1946                                                 ins[0] = first->value.value;
1947                                                 mode = get_irn_mode(ins[0]);
1948
1949                                                 /* no Phi needed so far */
1950                                                 env.curr_id_2_memop[pos] = first;
1951                                         } else {
1952                                                 ins[i] = conv_to(mop->value.value, mode);
1953                                                 if (ins[i] != ins[0]) {
1954                                                         if (ins[i] == NULL) {
1955                                                                 /* conversion failed */
1956                                                                 env.curr_id_2_memop[pos] = NULL;
1957                                                                 rbitset_clear(env.curr_set, pos);
1958                                                                 break;
1959                                                         }
1960                                                         need_phi = 1;
1961                                                 }
1962                                         }
1963                                 }
1964                                 if (need_phi) {
1965                                         /* build a Phi  */
1966                                         ir_node *phi = new_r_Phi(current_ir_graph, bl->block, n, ins, mode);
1967                                         memop_t *phiop = alloc_memop(phi);
1968
1969                                         phiop->value = first->value;
1970                                         phiop->value.value = phi;
1971
1972                                         /* no need to link it in, as it is a DATA phi */
1973
1974                                         env.curr_id_2_memop[pos] = phiop;
1975
1976                                         DB((dbg, LEVEL_3, "Created new %+F on merging value for address %+F\n", phi, first->value.address));
1977                                 }
1978                         }
1979                 }
1980         } else {
1981                 /* only one predecessor, simply copy the map */
1982                 ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
1983                 block_t *pred_bl = get_block_entry(pred);
1984
1985                 rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
1986
1987                 memcpy(env.curr_id_2_memop, pred_bl->id_2_memop_avail, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
1988         }
1989
1990         if (n > 1) {
1991                 /* check for partly redundant values */
1992                 for (pos = rbitset_next(bl->anticL_in, 0, 1);
1993                      pos < end;
1994                      pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
1995                         memop_t *op = bl->id_2_memop_antic[pos];
1996                         int     have_some, all_same;
1997                         ir_node *first;
1998
1999                         if (rbitset_is_set(env.curr_set, pos)) {
2000                                 /* already avail */
2001                                 continue;
2002                         }
2003
2004                         assert(is_Load(op->node));
2005
2006                         DB((dbg, LEVEL_3, "anticipated %+F\n", op->node));
2007
2008                         have_some  = 0;
2009                         all_same   = 1;
2010                         first      = 0;
2011                         for (i = n - 1; i >= 0; --i) {
2012                                 ir_node *pred    = get_Block_cfgpred_block(block, i);
2013                                 block_t *pred_bl = get_block_entry(pred);
2014                                 ir_mode *mode    = op->value.mode;
2015                                 memop_t *e;
2016                                 ir_node *adr;
2017
2018                                 adr = phi_translate(op->value.address, block, i);
2019                                 DB((dbg, LEVEL_3, ".. using address %+F in pred %d\n", adr, i));
2020                                 e   = find_address_avail(pred_bl, register_address(adr), mode);
2021                                 if (e == NULL) {
2022                                         ir_node *ef_block = get_nodes_block(adr);
2023                                         if (! block_dominates(ef_block, pred)) {
2024                                                 /* cannot place a copy here */
2025                                                 have_some = 0;
2026                                                 DB((dbg, LEVEL_3, "%+F cannot be moved into predecessor %+F\n", op->node, pred));
2027                                                 break;
2028                                         }
2029                                         DB((dbg, LEVEL_3, "%+F is not available in predecessor %+F\n", op->node, pred));
2030                                         pred_bl->avail = NULL;
2031                                         all_same       = 0;
2032                                 } else {
2033                                         if (e->value.mode != mode && !can_convert_to(e->value.mode, mode)) {
2034                                                 /* cannot create a Phi due to different modes */
2035                                                 have_some = 0;
2036                                                 break;
2037                                         }
2038                                         pred_bl->avail = e;
2039                                         have_some      = 1;
2040                                         DB((dbg, LEVEL_3, "%+F is available for %+F in predecessor %+F\n", e->node, op->node, pred));
2041                                         if (first == NULL)
2042                                                 first = e->node;
2043                                         else if (first != e->node)
2044                                                 all_same = 0;
2045                                 }
2046                         }
2047                         if (have_some && !all_same) {
2048                                 ir_mode *mode = op->value.mode;
2049                                 ir_node **in, *phi;
2050                                 memop_t *phi_op;
2051
2052                                 NEW_ARR_A(ir_node *, in, n);
2053
2054                                 for (i = n - 1; i >= 0; --i) {
2055                                         ir_node *pred    = get_Block_cfgpred_block(block, i);
2056                                         block_t *pred_bl = get_block_entry(pred);
2057
2058                                         if (pred_bl->avail == NULL) {
2059                                                 /* create a new Load here and make to make it fully redundant */
2060                                                 dbg_info *db       = get_irn_dbg_info(op->node);
2061                                                 ir_node  *last_mem = find_last_memory(pred_bl);
2062                                                 ir_node  *load, *def, *adr;
2063                                                 memop_t  *new_op;
2064
2065                                                 assert(last_mem != NULL);
2066                                                 adr  = phi_translate(op->value.address, block, i);
2067                                                 load = new_rd_Load(db, current_ir_graph, pred, last_mem, adr, mode, cons_none);
2068                                                 def  = new_r_Proj(current_ir_graph, pred, load, mode, pn_Load_res);
2069                                                 DB((dbg, LEVEL_1, "Created new %+F in %+F for party redundant %+F\n", load, pred, op->node));
2070
2071                                                 new_op                = alloc_memop(load);
2072                                                 new_op->mem           = new_r_Proj(current_ir_graph, pred, load, mode_M, pn_Load_M);
2073                                                 new_op->value.address = adr;
2074                                                 new_op->value.id      = op->value.id;
2075                                                 new_op->value.mode    = mode;
2076                                                 new_op->value.value   = def;
2077
2078                                                 new_op->projs[pn_Load_M]   = new_op->mem;
2079                                                 new_op->projs[pn_Load_res] = def;
2080
2081                                                 new_op->prev = pred_bl->memop_backward;
2082                                                 if (pred_bl->memop_backward != NULL)
2083                                                         pred_bl->memop_backward->next = new_op;
2084
2085                                                 pred_bl->memop_backward = new_op;
2086
2087                                                 if (pred_bl->memop_forward == NULL)
2088                                                         pred_bl->memop_forward = new_op;
2089
2090                                                 if (get_nodes_block(last_mem) == pred) {
2091                                                         /* We have add a new last memory op in pred block.
2092                                                            If pred had already a last mem, reroute all memory
2093                                                            users. */
2094                                                         reroute_all_mem_users(last_mem, new_op->mem);
2095                                                 } else {
2096                                                         /* reroute only those memory going through the pre block */
2097                                                         reroute_mem_through(last_mem, new_op->mem, pred);
2098                                                 }
2099
2100                                                 /* we added this load at the end, so it will be avail anyway */
2101                                                 add_memop_avail(pred_bl, new_op);
2102                                                 pred_bl->avail = new_op;
2103                                         }
2104                                         in[i] = conv_to(pred_bl->avail->value.value, mode);
2105                                 }
2106                                 phi = new_r_Phi(current_ir_graph, block, n, in, mode);
2107                                 DB((dbg, LEVEL_1, "Created new %+F in %+F for now redundant %+F\n", phi, block, op->node));
2108
2109                                 phi_op = clone_memop_phi(op, phi);
2110                                 add_memop(phi_op);
2111                         }
2112                 }
2113         }
2114
2115         /* recalculate avail by gen and kill */
2116         calc_gen_kill_avail(bl);
2117
2118         /* always update the map after gen/kill, as values might have been changed due to RAR/WAR/WAW */
2119         memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2120
2121         if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
2122                 /* the avail set has changed */
2123                 rbitset_cpy(bl->avail_out, env.curr_set, env.rbs_size);
2124                 dump_curr(bl, "Avail_out*");
2125                 return 1;
2126         }
2127         dump_curr(bl, "Avail_out");
2128         return 0;
2129 }  /* insert_Load */
2130
2131 /**
2132  * Insert Loads upwards.
2133  */
2134 static void insert_Loads_upwards(void) {
2135         int i, need_iter;
2136         block_t *bl;
2137
2138         /* recalculate antic_out and insert Loads */
2139         DB((dbg, LEVEL_2, "Inserting Loads\n"));
2140
2141         i = 0;
2142         do {
2143                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
2144
2145                 need_iter = 0;
2146
2147                 /* over all blocks in reverse post order, skip the start block */
2148                 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
2149                         need_iter |= insert_Load(bl);
2150                 }
2151                 ++i;
2152         } while (need_iter);
2153
2154         DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
2155 }  /* insert_Loads_upwards */
2156
2157 /**
2158  * Kill unreachable control flow.
2159  *
2160  * @param irg  the graph to operate on
2161  */
2162 static void kill_unreachable_blocks(ir_graph *irg) {
2163         block_t *bl;
2164         ir_node **ins;
2165         int     changed = 0;
2166
2167         NEW_ARR_A(ir_node *, ins, env.max_cfg_preds);
2168
2169         for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2170                 ir_node *block = bl->block;
2171                 int     i, j, k, n;
2172
2173                 assert(get_Block_mark(block));
2174
2175                 n = get_Block_n_cfgpreds(block);
2176
2177                 for (i = j = 0; i < n; ++i) {
2178                         ir_node *pred = get_Block_cfgpred(block, i);
2179                         ir_node *pred_bl;
2180
2181                         if (is_Bad(pred))
2182                                 continue;
2183
2184                         pred_bl = get_nodes_block(skip_Proj(pred));
2185                         if (! get_Block_mark(pred_bl))
2186                                 continue;
2187
2188                         ins[j++] = pred;
2189                 }
2190                 if (j != n) {
2191                         ir_node *phi, *next;
2192
2193                         /* some unreachable blocks detected */
2194                         changed = 1;
2195
2196                         DB((dbg, LEVEL_1, "Killing dead block predecessors on %+F\n", block));
2197
2198                         set_irn_in(block, j, ins);
2199
2200                         /* shorten all Phi nodes */
2201                         for (phi = get_Block_phis(block); phi != NULL; phi = next) {
2202                                 next = get_Phi_next(phi);
2203
2204                                 for (i = k = 0; i < n; ++i) {
2205                                         ir_node *pred = get_Block_cfgpred_block(block, i);
2206
2207                                         if (is_Bad(pred))
2208                                                 continue;
2209
2210                                         if (! get_Block_mark(pred))
2211                                                 continue;
2212
2213                                         ins[k++] = get_Phi_pred(phi, i);
2214                                 }
2215                                 if (k == 1)
2216                                         exchange(phi, ins[0]);
2217                                 else
2218                                         set_irn_in(phi, k, ins);
2219                         }
2220                 }
2221
2222         }
2223
2224         if (changed) {
2225                 /* kick keep alives */
2226                 ir_node *end = get_irg_end(irg);
2227                 int     i, j, n = get_End_n_keepalives(end);
2228
2229                 NEW_ARR_A(ir_node *, ins, n);
2230
2231                 for (i = j = 0; i < n; ++i) {
2232                         ir_node *ka = get_End_keepalive(end, i);
2233                         ir_node *ka_bl;
2234
2235                         if (is_Bad(ka))
2236                                 continue;
2237                         if (is_Block(ka))
2238                                 ka_bl = ka;
2239                         else
2240                                 ka_bl = get_nodes_block(skip_Proj(ka));
2241                         if (get_Block_mark(ka_bl))
2242                                 ins[j++] = ka;
2243                 }
2244                 if (j != n)
2245                         set_End_keepalives(end, j, ins);
2246
2247                 free_irg_outs(irg);
2248
2249                 /* this transformation do NOT invalidate the dominance */
2250         }
2251 }  /* kill_unreachable_blocks */
2252
2253 int opt_ldst(ir_graph *irg) {
2254         block_t  *bl;
2255         ir_graph *rem = current_ir_graph;
2256
2257         current_ir_graph = irg;
2258
2259         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
2260 //      firm_dbg_set_mask(dbg, -1);
2261
2262         DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
2263
2264         /* we need landing pads */
2265         remove_critical_cf_edges(irg);
2266
2267 //      dump_ir_block_graph(irg, "-XXX");
2268
2269         if (get_opt_alias_analysis()) {
2270                 assure_irg_entity_usage_computed(irg);
2271                 assure_irp_globals_entity_usage_computed();
2272         }
2273
2274         obstack_init(&env.obst);
2275         ir_nodemap_init(&env.adr_map);
2276
2277         env.forward       = NULL;
2278         env.backward      = NULL;
2279         env.curr_adr_id   = 0;
2280         env.n_mem_ops     = 0;
2281         env.max_cfg_preds = 0;
2282         env.changed       = 0;
2283         env.start_bl      = get_irg_start_block(irg);
2284         env.end_bl        = get_irg_end_block(irg);
2285 #ifdef DEBUG_libfirm
2286         env.id_2_address  = NEW_ARR_F(ir_node *, 0);
2287 #endif
2288
2289         assure_irg_outs(irg);
2290
2291         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2292
2293         /* first step: allocate block entries. Note that some blocks might be
2294            unreachable here. Using the normal walk ensures that ALL blocks are initialized. */
2295         irg_walk_graph(irg, prepare_blocks, link_phis, NULL);
2296
2297         /* produce an inverse post-order list for the CFG: this links only reachable
2298            blocks */
2299         irg_out_block_walk(get_irg_start_block(irg), NULL, inverse_post_order, NULL);
2300
2301         if (! get_Block_mark(env.end_bl)) {
2302                 /*
2303                  * The end block is NOT reachable due to endless loops
2304                  * or no_return calls.
2305                  * Place the end block last.
2306                  * env.backward points to the last block in the list for this purpose.
2307                  */
2308                 env.backward->forward_next = get_block_entry(env.end_bl);
2309
2310                 set_Block_mark(env.end_bl, 1);
2311         }
2312
2313         /* KILL unreachable blocks: these disturb the data flow analysis */
2314         kill_unreachable_blocks(irg);
2315
2316         assure_doms(irg);
2317
2318         /* second step: find and sort all memory ops */
2319         walk_memory_irg(irg, collect_memops, NULL, NULL);
2320
2321 #ifdef DEBUG_libfirm
2322         /* check that the backward map is correct */
2323         assert((unsigned)ARR_LEN(env.id_2_address) == env.curr_adr_id);
2324 #endif
2325
2326         if (env.n_mem_ops == 0) {
2327                 /* no memory ops */
2328                 goto end;
2329         }
2330
2331         /* create the backward links. */
2332         env.backward = NULL;
2333         irg_block_walk_graph(irg, NULL, collect_backward, NULL);
2334
2335         /* link the end block in */
2336         bl = get_block_entry(env.end_bl);
2337         bl->backward_next = env.backward;
2338         env.backward      = bl;
2339
2340         /* check that we really start with the start / end block */
2341         assert(env.forward->block  == env.start_bl);
2342         assert(env.backward->block == env.end_bl);
2343
2344         /* create address sets: for now, only the existing addresses are allowed plus one
2345            needed for the sentinel */
2346         env.rbs_size = env.curr_adr_id + 1;
2347
2348         /* create the current set */
2349         env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2350         rbitset_set(env.curr_set, env.rbs_size - 1);
2351         env.curr_id_2_memop = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2352         memset(env.curr_id_2_memop, 0, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2353
2354         for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2355                 /* set sentinel bits */
2356                 bl->avail_out  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2357                 rbitset_set(bl->avail_out, env.rbs_size - 1);
2358
2359                 bl->id_2_memop_avail = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2360                 memset(bl->id_2_memop_avail, 0, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
2361
2362                 bl->anticL_in  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2363                 rbitset_set(bl->anticL_in, env.rbs_size - 1);
2364
2365                 bl->id_2_memop_antic = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2366                 memset(bl->id_2_memop_antic, 0, env.rbs_size * sizeof(bl->id_2_memop_antic[0]));
2367         }
2368
2369 //      dump_block_list(&env);
2370
2371         calcAvail();
2372         calcAntic();
2373
2374         insert_Loads_upwards();
2375
2376         if (env.changed) {
2377                 /* over all blocks in reverse post order */
2378                 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2379                         do_replacements(bl);
2380                 }
2381
2382                 /* not only invalidate but free them. We might allocate new out arrays
2383                    on our obstack which will be deleted yet. */
2384                 free_irg_outs(irg);
2385                 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
2386         }
2387 end:
2388
2389         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2390         ir_nodemap_destroy(&env.adr_map);
2391         obstack_free(&env.obst, NULL);
2392
2393 //      dump_ir_block_graph(irg, "-YYY");
2394
2395 #ifdef DEBUG_libfirm
2396         DEL_ARR_F(env.id_2_address);
2397 #endif
2398
2399         current_ir_graph = rem;
2400         return env.changed != 0;
2401 }  /* opt_ldst */