Create clean register requirements for Phis, otherwise reg_reqs_equal() and maybe...
[libfirm] / ir / opt / opt_ldst.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Dataflow driven Load/Store optimizations, uses some ideas from
23  *          VanDrunen's LEPRE
24  * @author  Michael Beck
25  * @version $Id$
26  */
27 #include "config.h"
28
29 #include "irnode_t.h"
30 #include "irflag_t.h"
31 #include "array_t.h"
32 #include "ircons.h"
33 #include "irdom.h"
34 #include "irgmod.h"
35 #include "irgwalk.h"
36 #include "irouts.h"
37 #include "irgraph.h"
38 #include "irgopt.h"
39 #include "iropt.h"
40 #include "iroptimize.h"
41 #include "irnodemap.h"
42 #include "raw_bitset.h"
43 #include "debug.h"
44 #include "error.h"
45 #include "irpass.h"
46
47 /* maximum number of output Proj's */
48 #define MAX_PROJ (pn_Load_max > pn_Store_max ? pn_Load_max : pn_Store_max)
49
50 /**
51  * Mapping an address to an dense ID.
52  */
53 typedef struct address_entry_t {
54         unsigned id;          /**< The ID */
55 } address_entry;
56
57 /**
58  * Memop-flags.
59  */
60 enum memop_flags {
61         FLAG_KILL_ALL    = 1, /**< KILL all addresses */
62         FLAG_KILLED_NODE = 2, /**< this node was killed */
63         FLAG_EXCEPTION   = 4, /**< this node has exception flow */
64         FLAG_IGNORE      = 8, /**< ignore this node (volatile or other) */
65 };
66
67 /**
68  * A value: This represents a value stored at a given address in
69  * memory. Do not confuse with values from value numbering.
70  */
71 typedef struct value_t value_t;
72 struct value_t {
73         ir_node  *address;    /**< the address of this value */
74         ir_node  *value;      /**< the value itself */
75         ir_mode  *mode;       /**< the mode of the value */
76         unsigned id;          /**< address id */
77 };
78
79 /**
80  * A memop describes an memory-related operation.
81  * These are Loads/Store and all other ops that might modify
82  * memory (Calls, CopyB) or causing exceptions.
83  */
84 typedef struct memop_t memop_t;
85 struct memop_t {
86         value_t  value;      /**< the value of this memop: only defined for Load/Store */
87         ir_node  *node;      /**< the memory op itself */
88         ir_node  *mem;       /**< the memory FROM this node */
89         ir_node  *replace;   /**< the replacement node if this memop is replaced */
90         memop_t  *next;      /**< links to the next memory op in the block in forward order. */
91         memop_t  *prev;      /**< links to the previous memory op in the block in forward order. */
92         unsigned flags;      /**< memop flags */
93         ir_node  *projs[MAX_PROJ]; /**< Projs of this memory op */
94 };
95
96 /**
97  * Additional data for every basic block.
98  */
99 typedef struct block_t block_t;
100 struct block_t {
101         memop_t  *memop_forward;     /**< topologically sorted list of memory ops in this block */
102         memop_t  *memop_backward;    /**< last memop in the list */
103         unsigned *avail_out;         /**< out-set of available addresses */
104         memop_t  **id_2_memop_avail; /**< maps avail address ids to memops */
105         unsigned *anticL_in;         /**< in-set of anticipated Load addresses */
106         memop_t  **id_2_memop_antic; /**< maps anticipated address ids to memops */
107         ir_node  *block;             /**< the associated block */
108         block_t  *forward_next;      /**< next block entry for forward iteration */
109         block_t  *backward_next;     /**< next block entry for backward iteration */
110         memop_t  *avail;             /**< used locally for the avail map */
111         memop_t  **trans_results;    /**< used to cached translated nodes due antic calculation. */
112 };
113
114 /**
115  * Metadata for this pass.
116  */
117 typedef struct ldst_env_t {
118         struct obstack  obst;              /**< obstack for temporary data */
119         ir_nodemap_t    adr_map;           /**< Map addresses to */
120         block_t         *forward;          /**< Inverse post-order list of all blocks Start->End */
121         block_t         *backward;         /**< Inverse post-order list of all blocks End->Start */
122         ir_node         *start_bl;         /**< start block of the current graph */
123         ir_node         *end_bl;           /**< end block of the current graph */
124         unsigned        *curr_set;         /**< current set of addresses */
125         memop_t         **curr_id_2_memop; /**< current map of address ids to memops */
126         unsigned        curr_adr_id;       /**< number for address mapping */
127         unsigned        n_mem_ops;         /**< number of memory operations (Loads/Stores) */
128         unsigned        rbs_size;          /**< size of all bitsets in bytes */
129         int             max_cfg_preds;     /**< maximum number of block cfg predecessors */
130         int             changed;           /**< Flags for changed graph state */
131 #ifdef DEBUG_libfirm
132         ir_node         **id_2_address;    /**< maps an id to the used address */
133 #endif
134 } ldst_env;
135
136 /* the one and only environment */
137 static ldst_env env;
138
139 #ifdef DEBUG_libfirm
140
141 static firm_dbg_module_t *dbg;
142
143 /**
144  * Dumps the block list.
145  *
146  * @param ldst environment
147  */
148 static void dump_block_list(ldst_env *env) {
149         block_t *entry;
150         memop_t *op;
151         int     i;
152
153         for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
154                 DB((dbg, LEVEL_2, "%+F {", entry->block));
155
156                 i = 0;
157                 for (op = entry->memop_forward; op != NULL; op = op->next) {
158                         if (i == 0) {
159                                 DB((dbg, LEVEL_2, "\n\t"));
160                         }                       DB((dbg, LEVEL_2, "%+F", op->node));
161                         if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
162                                 DB((dbg, LEVEL_2, "X"));
163                         else if (op->flags & FLAG_KILL_ALL)
164                                 DB((dbg, LEVEL_2, "K"));
165                         DB((dbg, LEVEL_2, ", "));
166
167                         i = (i + 1) & 3;
168                 }
169                 DB((dbg, LEVEL_2, "\n}\n\n"));
170         }
171 }  /* dump_block_list */
172
173 /**
174  * Dumps the current set.
175  *
176  * @param bl   current block
177  * @param s    name of the set
178  */
179 static void dump_curr(block_t *bl, const char *s) {
180         unsigned end = env.rbs_size - 1;
181         unsigned pos;
182         int      i;
183
184         DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
185         i = 0;
186         for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
187                 memop_t *op = env.curr_id_2_memop[pos];
188
189                 if (i == 0) {
190                         DB((dbg, LEVEL_2, "\n\t"));
191                 }
192
193                 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
194                 i = (i + 1) & 3;
195         }
196         DB((dbg, LEVEL_2, "\n}\n"));
197 }  /* dump_curr */
198
199 #else
200 static void dump_block_list(ldst_env *env) {
201         (void) env;
202 }
203 static void dump_curr(block_t *bl, const char *s) {
204         (void) bl;
205         (void) s;
206 }
207 #endif /* DEBUG_libfirm */
208
209 /** Get the block entry for a block node */
210 static block_t *get_block_entry(const ir_node *block) {
211         assert(is_Block(block));
212
213         return get_irn_link(block);
214 }  /* get_block_entry */
215
216 /** Get the memop entry for a memory operation node */
217 static memop_t *get_irn_memop(const ir_node *irn) {
218         assert(! is_Block(irn));
219         return get_irn_link(irn);
220 }  /* get_irn_memop */
221
222 /**
223  * Walk over the memory edges from definition to users.
224  * This ensures, that even operation without memory output are found.
225  *
226  * @param irn   start node
227  * @param pre   pre walker function
228  * @param post  post walker function
229  * @param ctx   context parameter for the walker functions
230  */
231 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx) {
232         int     i;
233         ir_mode *mode;
234
235         mark_irn_visited(irn);
236
237         if (pre)
238                 pre(irn, ctx);
239
240         mode = get_irn_mode(irn);
241         if (mode == mode_M) {
242                 /* every successor uses memory */
243                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
244                         ir_node *succ = get_irn_out(irn, i);
245
246                         if (! irn_visited(succ))
247                                 walk_memory(succ, pre, post, ctx);
248                 }
249         } else if (mode == mode_T) {
250                 /* only some Proj's uses memory */
251                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
252                         ir_node *proj = get_irn_out(irn, i);
253
254                         if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
255                                 walk_memory(proj, pre, post, ctx);
256                 }
257         }
258         if (post)
259                 post(irn, ctx);
260 }  /* walk_memory */
261
262 /**
263  * Walks over all memory nodes of a graph.
264  *
265  * @param irg   a graph
266  * @param pre   pre walker function
267  * @param post  post walker function
268  * @param ctx   context parameter for the walker functions
269  */
270 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx) {
271         inc_irg_visited(irg);
272
273         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
274
275         /*
276          * there are two possible sources for memory: initial_mem and nomem
277          * we ignore nomem as this should NOT change the memory
278          */
279         walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
280
281         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
282 }  /* walk_memory_irg */
283
284 /**
285  * Register an address and allocate a (sparse, 0..n) ID for it.
286  *
287  * @param adr  the IR-node representing the address
288  *
289  * @return the allocated id
290  */
291 static unsigned register_address(ir_node *adr) {
292         address_entry *entry;
293
294         /* skip Confirms and Casts */
295 restart:
296         if (is_Confirm(adr)) {
297                 adr = get_Confirm_value(adr);
298                 goto restart;
299         }
300         if (is_Cast(adr)) {
301                 adr = get_Cast_op(adr);
302                 goto restart;
303         }
304
305         entry = ir_nodemap_get(&env.adr_map, adr);
306
307         if (entry == NULL) {
308                 /* new address */
309                 entry = obstack_alloc(&env.obst, sizeof(*entry));
310
311                 entry->id = env.curr_adr_id++;
312                 ir_nodemap_insert(&env.adr_map, adr, entry);
313
314                 DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
315 #ifdef DEBUG_libfirm
316                 ARR_APP1(ir_node *, env.id_2_address, adr);
317 #endif
318         }
319         return entry->id;
320 }  /* register_address */
321
322
323 /**
324  * translate an address through a Phi node into a given predecessor
325  * block.
326  *
327  * @param address  the address
328  * @param block    the block
329  * @param pos      the position of the predecessor in block
330  */
331 static ir_node *phi_translate(ir_node *address, const ir_node *block, int pos) {
332         if (is_Phi(address) && get_nodes_block(address) == block)
333                 address = get_Phi_pred(address, pos);
334         return address;
335 }  /* phi_translate */
336
337 /**
338  * Walker: allocate an block entry for every block
339  * and register all potential addresses.
340  */
341 static void prepare_blocks(ir_node *irn, void *ctx) {
342         (void)ctx;
343
344         if (is_Block(irn)) {
345                 block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
346                 int     n;
347
348                 entry->memop_forward    = NULL;
349                 entry->memop_backward   = NULL;
350                 entry->avail_out        = NULL;
351                 entry->id_2_memop_avail = NULL;
352                 entry->anticL_in        = NULL;
353                 entry->id_2_memop_antic = NULL;
354                 entry->block            = irn;
355                 entry->forward_next     = NULL;
356                 entry->backward_next    = NULL;
357                 entry->avail            = NULL;
358                 entry->trans_results    = NULL;
359                 set_irn_link(irn, entry);
360
361                 set_Block_phis(irn, NULL);
362
363                 /* use block marks to track unreachable blocks */
364                 set_Block_mark(irn, 0);
365
366                 n = get_Block_n_cfgpreds(irn);
367                 if (n > env.max_cfg_preds)
368                         env.max_cfg_preds = n;
369         } else {
370                 ir_mode *mode = get_irn_mode(irn);
371
372                 if (mode_is_reference(mode)) {
373                         /*
374                          * Register ALL possible addresses: this is overkill yet but
375                          * simpler then doing it for all possible translated addresses
376                          * (which would be sufficient in the moment.
377                          */
378                         (void)register_address(irn);
379                 }
380         }
381 }  /* prepare_blocks */
382
383 /**
384  * Post-Walker, link in all Phi's
385  */
386 static void link_phis(ir_node *irn, void *ctx) {
387         (void)ctx;
388
389         if (is_Phi(irn)) {
390                 ir_node *block = get_nodes_block(irn);
391                 add_Block_phi(block, irn);
392         }
393 }  /* link_phis */
394
395 /**
396  * Block walker: creates the inverse post-order list for the CFG.
397  */
398 static void inverse_post_order(ir_node *block, void *ctx) {
399         block_t *entry = get_block_entry(block);
400
401         (void)ctx;
402
403         /* mark this block IS reachable from start */
404         set_Block_mark(block, 1);
405
406         /* create the list in inverse order */
407         entry->forward_next = env.forward;
408         env.forward         = entry;
409
410         /* remember the first visited (last in list) entry, needed for later */
411         if (env.backward == NULL)
412                 env.backward = entry;
413 }  /* inverse_post_order */
414
415 /**
416  * Block walker: create backward links for the memops of a block.
417  */
418 static void collect_backward(ir_node *block, void *ctx) {
419         block_t *entry = get_block_entry(block);
420         memop_t *last, *op;
421
422         (void)ctx;
423
424         /*
425          * Do NOT link in the end block yet. We want it to be
426          * the first in the list. This is NOT guaranteed by the walker
427          * if we have endless loops.
428          */
429         if (block != env.end_bl) {
430                 entry->backward_next = env.backward;
431
432                 /* create the list in inverse order */
433                 env.backward = entry;
434         }
435
436         /* create backward links for all memory ops */
437         last = NULL;
438         for (op = entry->memop_forward; op != NULL; op = op->next) {
439                 op->prev = last;
440                 last     = op;
441         }
442         entry->memop_backward = last;
443 }  /* collect_backward */
444
445 /**
446  * Allocate a memop.
447  *
448  * @param irn  the IR-node representing the memop or NULL
449  *             if this is a translated (virtual) memop
450  *
451  * @return the allocated memop
452  */
453 static memop_t *alloc_memop(ir_node *irn) {
454         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
455
456         m->value.address = NULL;
457         m->value.value   = NULL;
458         m->value.mode    = NULL;
459
460         m->node          = irn;
461         m->mem           = NULL;
462         m->replace       = NULL;
463         m->next          = NULL;
464         m->flags         = 0;
465
466         memset(m->projs, 0, sizeof(m->projs));
467
468         if (irn != NULL)
469                 set_irn_link(irn, m);
470         return m;
471 }  /* alloc_memop */
472
473 /**
474  * Create a memop for a Phi-replacement.
475  *
476  * @param op   the memop to clone
477  * @param phi  the Phi-node representing the new value
478  */
479 static memop_t *clone_memop_phi(memop_t *op, ir_node *phi) {
480         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
481
482         m->value         = op->value;
483         m->value.value   = phi;
484
485         m->node          = phi;
486         m->replace       = NULL;
487         m->next          = NULL;
488         m->flags         = 0;
489
490         set_irn_link(phi, m);
491         return m;
492 }  /* clone_memop_phi */
493
494 /**
495  * Return the memory properties of a call node.
496  *
497  * @param call  the call node
498  *
499  * return a bitset of mtp_property_const and mtp_property_pure
500  */
501 static unsigned get_Call_memory_properties(ir_node *call) {
502         ir_type *call_tp = get_Call_type(call);
503         unsigned prop = get_method_additional_properties(call_tp);
504
505         /* check first the call type */
506         if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
507                 /* try the called entity */
508                 ir_node *ptr = get_Call_ptr(call);
509
510                 if (is_Global(ptr)) {
511                         ir_entity *ent = get_Global_entity(ptr);
512
513                         prop = get_entity_additional_properties(ent);
514                 }
515         }
516         return prop & (mtp_property_const|mtp_property_pure);
517 }  /* get_Call_memory_properties */
518
519 /**
520  * Returns an entity if the address ptr points to a constant one.
521  *
522  * @param ptr  the address
523  *
524  * @return an entity or NULL
525  */
526 static ir_entity *find_constant_entity(ir_node *ptr) {
527         for (;;) {
528                 if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
529                         return get_SymConst_entity(ptr);
530                 } else if (is_Sel(ptr)) {
531                         ir_entity *ent = get_Sel_entity(ptr);
532                         ir_type   *tp  = get_entity_owner(ent);
533
534                         /* Do not fiddle with polymorphism. */
535                         if (is_Class_type(get_entity_owner(ent)) &&
536                                 ((get_entity_n_overwrites(ent)    != 0) ||
537                                 (get_entity_n_overwrittenby(ent) != 0)   ) )
538                                 return NULL;
539
540                         if (is_Array_type(tp)) {
541                                 /* check bounds */
542                                 int i, n;
543
544                                 for (i = 0, n = get_Sel_n_indexs(ptr); i < n; ++i) {
545                                         ir_node *bound;
546                                         tarval *tlower, *tupper;
547                                         ir_node *index = get_Sel_index(ptr, i);
548                                         tarval *tv     = computed_value(index);
549
550                                         /* check if the index is constant */
551                                         if (tv == tarval_bad)
552                                                 return NULL;
553
554                                         bound  = get_array_lower_bound(tp, i);
555                                         tlower = computed_value(bound);
556                                         bound  = get_array_upper_bound(tp, i);
557                                         tupper = computed_value(bound);
558
559                                         if (tlower == tarval_bad || tupper == tarval_bad)
560                                                 return NULL;
561
562                                         if (tarval_cmp(tv, tlower) & pn_Cmp_Lt)
563                                                 return NULL;
564                                         if (tarval_cmp(tupper, tv) & pn_Cmp_Lt)
565                                                 return NULL;
566
567                                         /* ok, bounds check finished */
568                                 }
569                         }
570
571                         if (variability_constant == get_entity_variability(ent))
572                                 return ent;
573
574                         /* try next */
575                         ptr = get_Sel_ptr(ptr);
576                 } else if (is_Add(ptr)) {
577                         ir_node *l = get_Add_left(ptr);
578                         ir_node *r = get_Add_right(ptr);
579
580                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
581                                 ptr = l;
582                         else if (get_irn_mode(r) == get_irn_mode(ptr) && is_Const(l))
583                                 ptr = r;
584                         else
585                                 return NULL;
586
587                         /* for now, we support only one addition, reassoc should fold all others */
588                         if (! is_SymConst(ptr) && !is_Sel(ptr))
589                                 return NULL;
590                 } else if (is_Sub(ptr)) {
591                         ir_node *l = get_Sub_left(ptr);
592                         ir_node *r = get_Sub_right(ptr);
593
594                         if (get_irn_mode(l) == get_irn_mode(ptr) &&     is_Const(r))
595                                 ptr = l;
596                         else
597                                 return NULL;
598                         /* for now, we support only one subtraction, reassoc should fold all others */
599                         if (! is_SymConst(ptr) && !is_Sel(ptr))
600                                 return NULL;
601                 } else
602                         return NULL;
603         }
604 }  /* find_constant_entity */
605
606 /**
607  * Return the Selection index of a Sel node from dimension n
608  */
609 static long get_Sel_array_index_long(ir_node *n, int dim) {
610         ir_node *index = get_Sel_index(n, dim);
611         assert(is_Const(index));
612         return get_tarval_long(get_Const_tarval(index));
613 }  /* get_Sel_array_index_long */
614
615 /**
616  * Returns the accessed component graph path for an
617  * node computing an address.
618  *
619  * @param ptr    the node computing the address
620  * @param depth  current depth in steps upward from the root
621  *               of the address
622  */
623 static compound_graph_path *rec_get_accessed_path(ir_node *ptr, int depth) {
624         compound_graph_path *res = NULL;
625         ir_entity           *root, *field, *ent;
626         int                 path_len, pos, idx;
627         tarval              *tv;
628         ir_type             *tp;
629
630         if (is_SymConst(ptr)) {
631                 /* a SymConst. If the depth is 0, this is an access to a global
632                  * entity and we don't need a component path, else we know
633                  * at least its length.
634                  */
635                 assert(get_SymConst_kind(ptr) == symconst_addr_ent);
636                 root = get_SymConst_entity(ptr);
637                 res = (depth == 0) ? NULL : new_compound_graph_path(get_entity_type(root), depth);
638         } else if (is_Sel(ptr)) {
639                 /* it's a Sel, go up until we find the root */
640                 res = rec_get_accessed_path(get_Sel_ptr(ptr), depth+1);
641                 if (res == NULL)
642                         return NULL;
643
644                 /* fill up the step in the path at the current position */
645                 field    = get_Sel_entity(ptr);
646                 path_len = get_compound_graph_path_length(res);
647                 pos      = path_len - depth - 1;
648                 set_compound_graph_path_node(res, pos, field);
649
650                 if (is_Array_type(get_entity_owner(field))) {
651                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
652                         set_compound_graph_path_array_index(res, pos, get_Sel_array_index_long(ptr, 0));
653                 }
654         } else if (is_Add(ptr)) {
655                 ir_node *l    = get_Add_left(ptr);
656                 ir_node *r    = get_Add_right(ptr);
657                 ir_mode *mode = get_irn_mode(ptr);
658                 tarval  *tmp;
659
660                 if (is_Const(r) && get_irn_mode(l) == mode) {
661                         ptr = l;
662                         tv  = get_Const_tarval(r);
663                 } else {
664                         ptr = r;
665                         tv  = get_Const_tarval(l);
666                 }
667 ptr_arith:
668                 mode = get_tarval_mode(tv);
669                 tmp  = tv;
670
671                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
672                 if (is_Sel(ptr)) {
673                         field = get_Sel_entity(ptr);
674                 } else {
675                         field = get_SymConst_entity(ptr);
676                 }
677                 idx = 0;
678                 for (ent = field;;) {
679                         unsigned size;
680                         tarval   *sz, *tv_index, *tlower, *tupper;
681                         ir_node  *bound;
682
683                         tp = get_entity_type(ent);
684                         if (! is_Array_type(tp))
685                                 break;
686                         ent = get_array_element_entity(tp);
687                         size = get_type_size_bytes(get_entity_type(ent));
688                         sz   = new_tarval_from_long(size, mode);
689
690                         tv_index = tarval_div(tmp, sz);
691                         tmp      = tarval_mod(tmp, sz);
692
693                         if (tv_index == tarval_bad || tmp == tarval_bad)
694                                 return NULL;
695
696                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
697                         bound  = get_array_lower_bound(tp, 0);
698                         tlower = computed_value(bound);
699                         bound  = get_array_upper_bound(tp, 0);
700                         tupper = computed_value(bound);
701
702                         if (tlower == tarval_bad || tupper == tarval_bad)
703                                 return NULL;
704
705                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
706                                 return NULL;
707                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
708                                 return NULL;
709
710                         /* ok, bounds check finished */
711                         ++idx;
712                 }
713                 if (! tarval_is_null(tmp)) {
714                         /* access to some struct/union member */
715                         return NULL;
716                 }
717
718                 /* should be at least ONE array */
719                 if (idx == 0)
720                         return NULL;
721
722                 res = rec_get_accessed_path(ptr, depth + idx);
723                 if (res == NULL)
724                         return NULL;
725
726                 path_len = get_compound_graph_path_length(res);
727                 pos      = path_len - depth - idx;
728
729                 for (ent = field;;) {
730                         unsigned size;
731                         tarval   *sz, *tv_index;
732                         long     index;
733
734                         tp = get_entity_type(ent);
735                         if (! is_Array_type(tp))
736                                 break;
737                         ent = get_array_element_entity(tp);
738                         set_compound_graph_path_node(res, pos, ent);
739
740                         size = get_type_size_bytes(get_entity_type(ent));
741                         sz   = new_tarval_from_long(size, mode);
742
743                         tv_index = tarval_div(tv, sz);
744                         tv       = tarval_mod(tv, sz);
745
746                         /* worked above, should work again */
747                         assert(tv_index != tarval_bad && tv != tarval_bad);
748
749                         /* bounds already checked above */
750                         index = get_tarval_long(tv_index);
751                         set_compound_graph_path_array_index(res, pos, index);
752                         ++pos;
753                 }
754         } else if (is_Sub(ptr)) {
755                 ir_node *l = get_Sub_left(ptr);
756                 ir_node *r = get_Sub_right(ptr);
757
758                 ptr = l;
759                 tv  = get_Const_tarval(r);
760                 tv  = tarval_neg(tv);
761                 goto ptr_arith;
762         }
763         return res;
764 }  /* rec_get_accessed_path */
765
766 /**
767  * Returns an access path or NULL.  The access path is only
768  * valid, if the graph is in phase_high and _no_ address computation is used.
769  */
770 static compound_graph_path *get_accessed_path(ir_node *ptr) {
771         compound_graph_path *gr = rec_get_accessed_path(ptr, 0);
772         return gr;
773 }  /* get_accessed_path */
774
775 typedef struct path_entry {
776         ir_entity         *ent;
777         struct path_entry *next;
778         long              index;
779 } path_entry;
780
781 static ir_node *rec_find_compound_ent_value(ir_node *ptr, path_entry *next) {
782         path_entry       entry, *p;
783         ir_entity        *ent, *field;
784         ir_initializer_t *initializer;
785         tarval           *tv;
786         ir_type          *tp;
787         unsigned         n;
788
789         entry.next = next;
790         if (is_SymConst(ptr)) {
791                 /* found the root */
792                 ent         = get_SymConst_entity(ptr);
793                 initializer = get_entity_initializer(ent);
794                 for (p = next; p != NULL;) {
795                         if (initializer->kind != IR_INITIALIZER_COMPOUND)
796                                 return NULL;
797                         n  = get_initializer_compound_n_entries(initializer);
798                         tp = get_entity_type(ent);
799
800                         if (is_Array_type(tp)) {
801                                 ent = get_array_element_entity(tp);
802                                 if (ent != p->ent) {
803                                         /* a missing [0] */
804                                         if (0 >= n)
805                                                 return NULL;
806                                         initializer = get_initializer_compound_value(initializer, 0);
807                                         continue;
808                                 }
809                         }
810                         if (p->index >= (int) n)
811                                 return NULL;
812                         initializer = get_initializer_compound_value(initializer, p->index);
813
814                         ent = p->ent;
815                         p   = p->next;
816                 }
817                 tp = get_entity_type(ent);
818                 while (is_Array_type(tp)) {
819                         ent = get_array_element_entity(tp);
820                         tp = get_entity_type(ent);
821                         /* a missing [0] */
822                         n  = get_initializer_compound_n_entries(initializer);
823                         if (0 >= n)
824                                 return NULL;
825                         initializer = get_initializer_compound_value(initializer, 0);
826                 }
827
828                 switch (initializer->kind) {
829                 case IR_INITIALIZER_CONST:
830                         return get_initializer_const_value(initializer);
831                 case IR_INITIALIZER_TARVAL:
832                 case IR_INITIALIZER_NULL:
833                 default:
834                         return NULL;
835                 }
836         } else if (is_Sel(ptr)) {
837                 entry.ent = field = get_Sel_entity(ptr);
838                 tp = get_entity_owner(field);
839                 if (is_Array_type(tp)) {
840                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
841                         entry.index = get_Sel_array_index_long(ptr, 0) - get_array_lower_bound_int(tp, 0);
842                 } else {
843                         int i, n_members = get_compound_n_members(tp);
844                         for (i = 0; i < n_members; ++i) {
845                                 if (get_compound_member(tp, i) == field)
846                                         break;
847                         }
848                         if (i >= n_members) {
849                                 /* not found: should NOT happen */
850                                 return NULL;
851                         }
852                         entry.index = i;
853                 }
854                 return rec_find_compound_ent_value(get_Sel_ptr(ptr), &entry);
855         }  else if (is_Add(ptr)) {
856                 ir_node  *l = get_Add_left(ptr);
857                 ir_node  *r = get_Add_right(ptr);
858                 ir_mode  *mode;
859                 unsigned pos;
860
861                 if (is_Const(r)) {
862                         ptr = l;
863                         tv  = get_Const_tarval(r);
864                 } else {
865                         ptr = r;
866                         tv  = get_Const_tarval(l);
867                 }
868 ptr_arith:
869                 mode = get_tarval_mode(tv);
870
871                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
872                 if (is_Sel(ptr)) {
873                         field = get_Sel_entity(ptr);
874                 } else {
875                         field = get_SymConst_entity(ptr);
876                 }
877
878                 /* count needed entries */
879                 pos = 0;
880                 for (ent = field;;) {
881                         tp = get_entity_type(ent);
882                         if (! is_Array_type(tp))
883                                 break;
884                         ent = get_array_element_entity(tp);
885                         ++pos;
886                 }
887                 /* should be at least ONE entry */
888                 if (pos == 0)
889                         return NULL;
890
891                 /* allocate the right number of entries */
892                 NEW_ARR_A(path_entry, p, pos);
893
894                 /* fill them up */
895                 pos = 0;
896                 for (ent = field;;) {
897                         unsigned size;
898                         tarval   *sz, *tv_index, *tlower, *tupper;
899                         long     index;
900                         ir_node  *bound;
901
902                         tp = get_entity_type(ent);
903                         if (! is_Array_type(tp))
904                                 break;
905                         ent = get_array_element_entity(tp);
906                         p[pos].ent  = ent;
907                         p[pos].next = &p[pos + 1];
908
909                         size = get_type_size_bytes(get_entity_type(ent));
910                         sz   = new_tarval_from_long(size, mode);
911
912                         tv_index = tarval_div(tv, sz);
913                         tv       = tarval_mod(tv, sz);
914
915                         if (tv_index == tarval_bad || tv == tarval_bad)
916                                 return NULL;
917
918                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
919                         bound  = get_array_lower_bound(tp, 0);
920                         tlower = computed_value(bound);
921                         bound  = get_array_upper_bound(tp, 0);
922                         tupper = computed_value(bound);
923
924                         if (tlower == tarval_bad || tupper == tarval_bad)
925                                 return NULL;
926
927                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
928                                 return NULL;
929                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
930                                 return NULL;
931
932                         /* ok, bounds check finished */
933                         index = get_tarval_long(tv_index);
934                         p[pos].index = index;
935                         ++pos;
936                 }
937                 if (! tarval_is_null(tv)) {
938                         /* hmm, wrong access */
939                         return NULL;
940                 }
941                 p[pos - 1].next = next;
942                 return rec_find_compound_ent_value(ptr, p);
943         } else if (is_Sub(ptr)) {
944                 ir_node *l = get_Sub_left(ptr);
945                 ir_node *r = get_Sub_right(ptr);
946
947                 ptr = l;
948                 tv  = get_Const_tarval(r);
949                 tv  = tarval_neg(tv);
950                 goto ptr_arith;
951         }
952         return NULL;
953 }  /* rec_find_compound_ent_value */
954
955 static ir_node *find_compound_ent_value(ir_node *ptr) {
956         return rec_find_compound_ent_value(ptr, NULL);
957 }  /* find_compound_ent_value */
958
959 /**
960  * Mark a Load memop to be replace by a definition
961  *
962  * @param op  the Load memop
963  */
964 static void mark_replace_load(memop_t *op, ir_node *def) {
965         op->replace = def;
966         op->flags |= FLAG_KILLED_NODE;
967         env.changed = 1;
968 }  /* mark_replace_load */
969
970 /**
971  * Mark a Store memop to be removed.
972  *
973  * @param op  the Store memop
974  */
975 static void mark_remove_store(memop_t *op) {
976         op->flags |= FLAG_KILLED_NODE;
977         env.changed = 1;
978 }  /* mark_remove_store */
979
980 /**
981  * Update a memop for a Load.
982  *
983  * @param m  the memop
984  */
985 static void update_Load_memop(memop_t *m) {
986         int       i;
987         ir_node   *load = m->node;
988         ir_node   *ptr;
989         ir_entity *ent;
990
991         if (get_Load_volatility(load) == volatility_is_volatile)
992                 m->flags |= FLAG_IGNORE;
993
994         ptr = get_Load_ptr(load);
995
996         m->value.address = ptr;
997
998         for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
999                 ir_node *proj = get_irn_out(load, i);
1000                 long    pn;
1001
1002                 /* beware of keep edges */
1003                 if (is_End(proj))
1004                         continue;
1005
1006                 pn = get_Proj_proj(proj);
1007                 m->projs[pn] = proj;
1008                 switch (pn) {
1009                 case pn_Load_res:
1010                         m->value.value = proj;
1011                         m->value.mode  = get_irn_mode(proj);
1012                         break;
1013                 case pn_Load_X_except:
1014                         m->flags |= FLAG_EXCEPTION;
1015                         break;
1016                 case pn_Load_M:
1017                         m->mem = proj;
1018                         break;
1019                 case pn_Load_X_regular:
1020                         break;
1021                 default:
1022                         panic("Unsupported Proj from Load %+F", proj);
1023                 }
1024         }
1025
1026         /* check if we can determine the entity that will be loaded */
1027         ent = find_constant_entity(ptr);
1028
1029         if (ent != NULL                                     &&
1030                 allocation_static == get_entity_allocation(ent) &&
1031                 visibility_external_allocated != get_entity_visibility(ent)) {
1032                 /* a static allocation that is not external: there should be NO exception
1033                  * when loading even if we cannot replace the load itself. */
1034                 ir_node *value = NULL;
1035
1036                 /* no exception, clear the m fields as it might be checked later again */
1037                 if (m->projs[pn_Load_X_except]) {
1038                         exchange(m->projs[pn_Load_X_except], new_Bad());
1039                         m->projs[pn_Load_X_except] = NULL;
1040                         m->flags &= ~FLAG_EXCEPTION;
1041                         env.changed = 1;
1042                 }
1043                 if (m->projs[pn_Load_X_regular]) {
1044                         exchange(m->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
1045                         m->projs[pn_Load_X_regular] = NULL;
1046                         env.changed = 1;
1047                 }
1048
1049                 if (variability_constant == get_entity_variability(ent)) {
1050                         if (is_atomic_entity(ent)) {
1051                                 /* Might not be atomic after lowering of Sels.  In this case we
1052                                  * could also load, but it's more complicated. */
1053                                 /* more simpler case: we load the content of a constant value:
1054                                  * replace it by the constant itself */
1055                                 value = get_atomic_ent_value(ent);
1056                         } else if (ent->has_initializer) {
1057                                 /* new style initializer */
1058                                 value = find_compound_ent_value(ptr);
1059                         } else {
1060                                 /* old style initializer */
1061                                 compound_graph_path *path = get_accessed_path(ptr);
1062
1063                                 if (path != NULL) {
1064                                         assert(is_proper_compound_graph_path(path, get_compound_graph_path_length(path)-1));
1065
1066                                         value = get_compound_ent_value_by_path(ent, path);
1067                                         DB((dbg, LEVEL_1, "  Constant access at %F%F resulted in %+F\n", ent, path, value));
1068                                         free_compound_graph_path(path);
1069                                 }
1070                         }
1071                         if (value != NULL)
1072                                 value = can_replace_load_by_const(load, value);
1073                 }
1074
1075                 if (value != NULL) {
1076                         /* we completely replace the load by this value */
1077                         DB((dbg, LEVEL_1, "Replacing Load %+F by constant %+F\n", m->node, value));
1078                         mark_replace_load(m, value);
1079                         return;
1080                 }
1081         }
1082
1083         if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
1084                 /* only create an address if this node is NOT killed immediately or ignored */
1085                 m->value.id = register_address(ptr);
1086                 ++env.n_mem_ops;
1087         } else {
1088                 /* no user, KILL it */
1089                 mark_replace_load(m, NULL);
1090         }
1091 }  /* update_Load_memop */
1092
1093 /**
1094  * Update a memop for a Store.
1095  *
1096  * @param m  the memop
1097  */
1098 static void update_Store_memop(memop_t *m) {
1099         int     i;
1100         ir_node *store = m->node;
1101         ir_node *adr   = get_Store_ptr(store);
1102
1103         if (get_Store_volatility(store) == volatility_is_volatile) {
1104                 m->flags |= FLAG_IGNORE;
1105         } else {
1106                 /* only create an address if this node is NOT ignored */
1107                 m->value.id = register_address(adr);
1108                 ++env.n_mem_ops;
1109         }
1110
1111         m->value.address = adr;
1112
1113         for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
1114                 ir_node *proj = get_irn_out(store, i);
1115                 long    pn;
1116
1117                 /* beware of keep edges */
1118                 if (is_End(proj))
1119                         continue;
1120
1121                 pn = get_Proj_proj(proj);
1122                 m->projs[pn] = proj;
1123                 switch (pn) {
1124                 case pn_Store_X_except:
1125                         m->flags |= FLAG_EXCEPTION;
1126                         break;
1127                 case pn_Store_M:
1128                         m->mem = proj;
1129                         break;
1130                 case pn_Store_X_regular:
1131                         break;
1132                 default:
1133                         panic("Unsupported Proj from Store %+F", proj);
1134                 }
1135         }
1136         m->value.value = get_Store_value(store);
1137         m->value.mode  = get_irn_mode(m->value.value);
1138 }  /* update_Store_memop */
1139
1140 /**
1141  * Update a memop for a Call.
1142  *
1143  * @param m  the memop
1144  */
1145 static void update_Call_memop(memop_t *m) {
1146         ir_node  *call = m->node;
1147         unsigned prop  = get_Call_memory_properties(call);
1148         int      i;
1149
1150         if (prop & mtp_property_const) {
1151                 /* A constant call did NOT use memory at all, we
1152                    can kick it from the list. */
1153         } else if (prop & mtp_property_pure) {
1154                 /* pure calls READ memory */
1155                 m->flags = 0;
1156         } else
1157                 m->flags = FLAG_KILL_ALL;
1158
1159         for (i = get_irn_n_outs(call) - 1; i >= 0; --i) {
1160                 ir_node *proj = get_irn_out(call, i);
1161
1162                 /* beware of keep edges */
1163                 if (is_End(proj))
1164                         continue;
1165
1166                 switch (get_Proj_proj(proj)) {
1167                 case pn_Call_X_except:
1168                         m->flags |= FLAG_EXCEPTION;
1169                         break;
1170                 case pn_Call_M_regular:
1171                         m->mem = proj;
1172                         break;
1173                 }
1174         }
1175 }  /* update_Call_memop */
1176
1177 /**
1178  * Update a memop for a Div/Mod/Quot/DivMod.
1179  *
1180  * @param m  the memop
1181  */
1182 static void update_DivOp_memop(memop_t *m) {
1183         ir_node *div = m->node;
1184         int     i;
1185
1186         for (i = get_irn_n_outs(div) - 1; i >= 0; --i) {
1187                 ir_node *proj = get_irn_out(div, i);
1188
1189                 /* beware of keep edges */
1190                 if (is_End(proj))
1191                         continue;
1192
1193                 switch (get_Proj_proj(proj)) {
1194                 case pn_Generic_X_except:
1195                         m->flags |= FLAG_EXCEPTION;
1196                         break;
1197                 case pn_Generic_M_regular:
1198                         m->mem = proj;
1199                         break;
1200                 }
1201         }
1202 }  /* update_DivOp_memop */
1203
1204 /**
1205  * Update a memop for a Phi.
1206  *
1207  * @param m  the memop
1208  */
1209 static void update_Phi_memop(memop_t *m) {
1210         /* the Phi is it's own mem */
1211         m->mem = m->node;
1212 }  /* update_Phi_memop */
1213
1214 /**
1215  * Memory walker: collect all memory ops and build topological lists.
1216  */
1217 static void collect_memops(ir_node *irn, void *ctx) {
1218         memop_t  *op;
1219         ir_node  *block;
1220         block_t  *entry;
1221
1222         (void) ctx;
1223         if (is_Proj(irn)) {
1224                 /* we can safely ignore ProjM's except the initial memory */
1225                 if (irn != get_irg_initial_mem(current_ir_graph))
1226                         return;
1227         }
1228
1229         op    = alloc_memop(irn);
1230         block = get_nodes_block(irn);
1231         entry = get_block_entry(block);
1232
1233         if (is_Phi(irn)) {
1234                 update_Phi_memop(op);
1235                 /* Phis must be always placed first */
1236                 op->next = entry->memop_forward;
1237                 entry->memop_forward = op;
1238                 if (entry->memop_backward == NULL)
1239                         entry->memop_backward = op;
1240         } else {
1241                 switch (get_irn_opcode(irn)) {
1242                 case iro_Load:
1243                         update_Load_memop(op);
1244                         break;
1245                 case iro_Store:
1246                         update_Store_memop(op);
1247                         break;
1248                 case iro_Call:
1249                         update_Call_memop(op);
1250                         break;
1251                 case iro_Sync:
1252                 case iro_Pin:
1253                         op->mem = irn;
1254                         break;
1255                 case iro_Proj:
1256                         /* initial memory */
1257                         op->mem = irn;
1258                         break;
1259                 case iro_Return:
1260                 case iro_End:
1261                         /* we can those to find the memory edge */
1262                         break;
1263                 case iro_Div:
1264                 case iro_DivMod:
1265                 case iro_Quot:
1266                 case iro_Mod:
1267                         update_DivOp_memop(op);
1268                         break;
1269
1270                 case iro_Builtin:
1271                         /* TODO: handle some builtins */
1272                 default:
1273                         /* unsupported operation */
1274                         op->flags = FLAG_KILL_ALL;
1275                 }
1276
1277
1278                 /* all other should be placed last */
1279                 if (entry->memop_backward == NULL) {
1280                         entry->memop_forward = entry->memop_backward = op;
1281                 } else {
1282                         entry->memop_backward->next = op;
1283                         entry->memop_backward       = op;
1284                 }
1285         }
1286 }  /* collect_memops */
1287
1288 /**
1289  * Find an address in the current set.
1290  *
1291  * @param value  the value to be searched for
1292  *
1293  * @return a memop for the value or NULL if the value does
1294  *         not exists in the set or cannot be converted into
1295  *         the requested mode
1296  */
1297 static memop_t *find_address(const value_t *value) {
1298         if (rbitset_is_set(env.curr_set, value->id)) {
1299                 memop_t *res = env.curr_id_2_memop[value->id];
1300
1301                 if (res->value.mode == value->mode)
1302                         return res;
1303                 /* allow hidden casts */
1304                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1305                     get_mode_arithmetic(value->mode) == irma_twos_complement &&
1306                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1307                         return res;
1308         }
1309         return NULL;
1310 }  /* find_address */
1311
1312 /**
1313  * Find an address in the avail_out set.
1314  *
1315  * @param bl     the block
1316  */
1317 static memop_t *find_address_avail(const block_t *bl, unsigned id, const ir_mode *mode) {
1318         if (rbitset_is_set(bl->avail_out, id)) {
1319                 memop_t *res = bl->id_2_memop_avail[id];
1320
1321                 if (res->value.mode == mode)
1322                         return res;
1323                 /* allow hidden casts */
1324                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1325                     get_mode_arithmetic(mode) == irma_twos_complement &&
1326                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(mode))
1327                         return res;
1328         }
1329         return NULL;
1330 }  /* find_address_avail */
1331
1332 /**
1333  * Kill all addresses from the current set.
1334  */
1335 static void kill_all(void) {
1336         rbitset_clear_all(env.curr_set, env.rbs_size);
1337
1338         /* set sentinel */
1339         rbitset_set(env.curr_set, env.rbs_size - 1);
1340 }  /* kill_all */
1341
1342 /**
1343  * Kill memops that are not alias free due to a Store value from the current set.
1344  *
1345  * @param value  the Store value
1346  */
1347 static void kill_memops(const value_t *value) {
1348         unsigned end = env.rbs_size - 1;
1349         unsigned pos;
1350
1351         for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1352                 memop_t *op = env.curr_id_2_memop[pos];
1353
1354                 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
1355                                                           op->value.address, op->value.mode)) {
1356                         rbitset_clear(env.curr_set, pos);
1357                         env.curr_id_2_memop[pos] = NULL;
1358                         DB((dbg, LEVEL_2, "KILLING %+F because of possible alias address %+F\n", op->node, value->address));
1359                 }
1360         }
1361 }  /* kill_memops */
1362
1363 /**
1364  * Add the value of a memop to the current set.
1365  *
1366  * @param op  the memory op
1367  */
1368 static void add_memop(memop_t *op) {
1369         rbitset_set(env.curr_set, op->value.id);
1370         env.curr_id_2_memop[op->value.id] = op;
1371 }  /* add_memop */
1372
1373 /**
1374  * Add the value of a memop to the avail_out set.
1375  *
1376  * @param bl  the block
1377  * @param op  the memory op
1378  */
1379 static void add_memop_avail(block_t *bl, memop_t *op) {
1380         rbitset_set(bl->avail_out, op->value.id);
1381         bl->id_2_memop_avail[op->value.id] = op;
1382 }  /* add_memop_avail */
1383
1384 /**
1385  * Check, if we can convert a value of one mode to another mode
1386  * without changing the representation of bits.
1387  *
1388  * @param from  the original mode
1389  * @param to    the destination mode
1390  */
1391 static int can_convert_to(const ir_mode *from, const ir_mode *to) {
1392         if (get_mode_arithmetic(from) == irma_twos_complement &&
1393             get_mode_arithmetic(to) == irma_twos_complement &&
1394             get_mode_size_bits(from) == get_mode_size_bits(to))
1395                 return 1;
1396         return 0;
1397 }  /* can_convert_to */
1398
1399 /**
1400  * Add a Conv to the requested mode if needed.
1401  *
1402  * @param irn   the IR-node to convert
1403  * @param mode  the destination mode
1404  *
1405  * @return the possible converted node or NULL
1406  *         if the conversion is not possible
1407  */
1408 static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
1409         ir_mode *other = get_irn_mode(irn);
1410         if (other != mode) {
1411                 /* different modes: check if conversion is possible without changing the bits */
1412                 if (can_convert_to(other, mode)) {
1413                         ir_node *block = get_nodes_block(irn);
1414                         return new_r_Conv(block, irn, mode);
1415                 }
1416                 /* otherwise not possible ... yet */
1417                 return NULL;
1418         }
1419         return irn;
1420 }  /* conv_to */
1421
1422 /**
1423  * Update the address of an value if this address was a load result
1424  * and the load is killed now.
1425  *
1426  * @param value  the value whose address is updated
1427  */
1428 static void update_address(value_t *value) {
1429         if (is_Proj(value->address)) {
1430                 ir_node *load = get_Proj_pred(value->address);
1431
1432                 if (is_Load(load)) {
1433                         const memop_t *op = get_irn_memop(load);
1434
1435                         if (op->flags & FLAG_KILLED_NODE)
1436                                 value->address = op->replace;
1437                 }
1438         }
1439 }  /* update_address */
1440
1441 /**
1442  * Do forward dataflow analysis on the given block and calculate the
1443  * GEN and KILL in the current (avail) set.
1444  *
1445  * @param bl  the block
1446  */
1447 static void calc_gen_kill_avail(block_t *bl) {
1448         memop_t *op;
1449         ir_node *def;
1450
1451         for (op = bl->memop_forward; op != NULL; op = op->next) {
1452                 switch (get_irn_opcode(op->node)) {
1453                 case iro_Phi:
1454                         /* meet */
1455                         break;
1456                 case iro_Sync:
1457                         /* join */
1458                         break;
1459                 case iro_Load:
1460                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1461                                 /* do we have this already? */
1462                                 memop_t *other;
1463
1464                                 update_address(&op->value);
1465                                 other = find_address(&op->value);
1466                                 if (other != NULL && other != op) {
1467                                         def = conv_to(other->value.value, op->value.mode);
1468                                         if (def != NULL) {
1469 #ifdef DEBUG_libfirm
1470                                                 if (is_Store(other->node)) {
1471                                                         /* RAW */
1472                                                         DB((dbg, LEVEL_1, "RAW %+F <- %+F(%+F)\n", op->node, def, other->node));
1473                                                 } else {
1474                                                         /* RAR */
1475                                                         DB((dbg, LEVEL_1, "RAR %+F <- %+F(%+F)\n", op->node, def, other->node));
1476                                                 }
1477 #endif
1478                                                 mark_replace_load(op, def);
1479                                                 /* do NOT change the memop table */
1480                                                 continue;
1481                                         }
1482                                 }
1483                                 /* add this value */
1484                                 add_memop(op);
1485                         }
1486                         break;
1487                 case iro_Store:
1488                         if (! (op->flags & FLAG_KILLED_NODE)) {
1489                                 /* do we have this store already */
1490                                 memop_t *other;
1491
1492                                 update_address(&op->value);
1493                                 other = find_address(&op->value);
1494                                 if (other != NULL) {
1495                                         if (is_Store(other->node)) {
1496                                                 if (op != other && !(other->flags & FLAG_IGNORE) &&
1497                                                     get_nodes_block(other->node) == get_nodes_block(op->node)) {
1498                                                         /*
1499                                                          * A WAW in the same block we can kick the first store.
1500                                                          * This is a shortcut: we know that the second Store will be anticipated
1501                                                          * then in an case.
1502                                                          */
1503                                                         DB((dbg, LEVEL_1, "WAW %+F <- %+F\n", other->node, op->node));
1504                                                         mark_remove_store(other);
1505                                                         /* FIXME: a Load might be get freed due to this killed store */
1506                                                 }
1507                                         } else if (other->value.value == op->value.value && !(op->flags & FLAG_IGNORE)) {
1508                                                 /* WAR */
1509                                                 DB((dbg, LEVEL_1, "WAR %+F <- %+F\n", op->node, other->node));
1510                                                 mark_remove_store(op);
1511                                                 /* do NOT change the memop table */
1512                                                 continue;
1513                                         }
1514                                 }
1515                                 /* KILL all possible aliases */
1516                                 kill_memops(&op->value);
1517                                 /* add this value */
1518                                 add_memop(op);
1519                         }
1520                         break;
1521                 default:
1522                         if (op->flags & FLAG_KILL_ALL)
1523                                 kill_all();
1524                 }
1525         }
1526 }  /* calc_gen_kill_avail */
1527
1528 #define BYTE_SIZE(x)  (((x) + 7) >> 3)
1529
1530 /**
1531  * Do forward dataflow analysis on a given block to calculate the avail_out set
1532  * for this block only.
1533  *
1534  * @param block  the block
1535  */
1536 static void forward_avail(block_t *bl) {
1537         /* fill the data from the current block */
1538         env.curr_id_2_memop = bl->id_2_memop_avail;
1539         env.curr_set        = bl->avail_out;
1540
1541         calc_gen_kill_avail(bl);
1542         dump_curr(bl, "Avail_out");
1543 }  /* forward_avail */
1544
1545 /**
1546  * Do backward dataflow analysis on a given block to calculate the antic set
1547  * of Loaded addresses.
1548  *
1549  * @param bl  the block
1550  *
1551  * @return non-zero if the set has changed since last iteration
1552  */
1553 static int backward_antic(block_t *bl) {
1554         memop_t *op;
1555         ir_node *block = bl->block;
1556         int     n = get_Block_n_cfg_outs(block);
1557
1558         if (n == 1) {
1559                 ir_node  *succ    = get_Block_cfg_out(block, 0);
1560                 block_t  *succ_bl = get_block_entry(succ);
1561                 int      pred_pos = get_Block_cfgpred_pos(succ, block);
1562                 unsigned end      = env.rbs_size - 1;
1563                 unsigned pos;
1564
1565                 kill_all();
1566
1567                 if (bl->trans_results == NULL) {
1568                         /* allocate the translate cache */
1569                         unsigned size = env.curr_adr_id * sizeof(bl->trans_results[0]);
1570                         bl->trans_results = obstack_alloc(&env.obst, size);
1571                         memset(bl->trans_results, 0, size);
1572                 }
1573
1574                 /* check for partly redundant values */
1575                 for (pos = rbitset_next(succ_bl->anticL_in, 0, 1);
1576                      pos < end;
1577                      pos = rbitset_next(succ_bl->anticL_in, pos + 1, 1)) {
1578                         /*
1579                          * do Phi-translation here: Note that at this point the nodes are
1580                          * not changed, so we can safely cache the results.
1581                          * However: Loads of Load results ARE bad, because we have no way
1582                           to translate them yet ...
1583                          */
1584                         memop_t *op = bl->trans_results[pos];
1585                         if (op == NULL) {
1586                                 /* not yet translated */
1587                                 ir_node *adr, *trans_adr;
1588
1589                                 op  = succ_bl->id_2_memop_antic[pos];
1590                                 adr = op->value.address;
1591
1592                                 trans_adr = phi_translate(adr, succ, pred_pos);
1593                                 if (trans_adr != adr) {
1594                                         /* create a new entry for the translated one */
1595                                         memop_t *new_op;
1596
1597                                         new_op = alloc_memop(NULL);
1598                                         new_op->value.address = trans_adr;
1599                                         new_op->value.id      = register_address(trans_adr);
1600                                         new_op->value.mode    = op->value.mode;
1601                                         new_op->node          = op->node; /* we need the node to decide if Load/Store */
1602                                         new_op->flags         = op->flags;
1603
1604                                         bl->trans_results[pos] = new_op;
1605                                         op = new_op;
1606                                 }
1607                         }
1608                         env.curr_id_2_memop[op->value.id] = op;
1609                         rbitset_set(env.curr_set, op->value.id);
1610                 }
1611         } else if (n > 1) {
1612                 ir_node *succ    = get_Block_cfg_out(block, 0);
1613                 block_t *succ_bl = get_block_entry(succ);
1614                 int i;
1615
1616                 rbitset_copy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1617                 memcpy(env.curr_id_2_memop, succ_bl->id_2_memop_antic, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1618
1619                 /* Hmm: probably we want kill merges of Loads ans Stores here */
1620                 for (i = n - 1; i > 0; --i) {
1621                         ir_node *succ    = get_Block_cfg_out(bl->block, i);
1622                         block_t *succ_bl = get_block_entry(succ);
1623
1624                         rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1625                 }
1626         } else {
1627                 /* block ends with a noreturn call */
1628                 kill_all();
1629         }
1630
1631         dump_curr(bl, "AnticL_out");
1632
1633         for (op = bl->memop_backward; op != NULL; op = op->prev) {
1634                 switch (get_irn_opcode(op->node)) {
1635                 case iro_Phi:
1636                         /* meet */
1637                         break;
1638                 case iro_Sync:
1639                         /* join */
1640                         break;
1641                 case iro_Load:
1642                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1643                                 /* always add it */
1644                                 add_memop(op);
1645                         }
1646                         break;
1647                 case iro_Store:
1648                         if (! (op->flags & FLAG_KILLED_NODE)) {
1649                                 /* a Store: check which memops must be killed */
1650                                 kill_memops(&op->value);
1651                         }
1652                         break;
1653                 default:
1654                         if (op->flags & FLAG_KILL_ALL)
1655                                 kill_all();
1656                 }
1657         }
1658
1659         memcpy(bl->id_2_memop_antic, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1660         if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1661                 /* changed */
1662                 rbitset_copy(bl->anticL_in, env.curr_set, env.rbs_size);
1663                 dump_curr(bl, "AnticL_in*");
1664                 return 1;
1665         }
1666         dump_curr(bl, "AnticL_in");
1667         return 0;
1668 }  /* backward_antic */
1669
1670 /**
1671  * Replace a Load memop by a already known value.
1672  *
1673  * @param op  the Load memop
1674  */
1675 static void replace_load(memop_t *op) {
1676         ir_node *load = op->node;
1677         ir_node *def  = skip_Id(op->replace);
1678         ir_node *proj;
1679         ir_mode *mode;
1680
1681         if (def != NULL)
1682                 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1683         else {
1684                 if (op->flags & FLAG_EXCEPTION) {
1685                         /* bad: this node is unused and executed for exception only */
1686                         DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1687                         return;
1688                 }
1689                 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1690         }
1691
1692         if (op->mem != NULL) {
1693                 /* in rare cases a Load might have NO memory */
1694                 exchange(op->mem, get_Load_mem(load));
1695         }
1696         proj = op->projs[pn_Load_res];
1697         if (proj != NULL) {
1698                 mode = get_irn_mode(proj);
1699                 if (get_irn_mode(def) != mode) {
1700                         /* a hidden cast */
1701                         dbg_info *db    = get_irn_dbg_info(load);
1702                         ir_node  *block = get_nodes_block(proj);
1703                         def = new_rd_Conv(db, block, def, mode);
1704                 }
1705                 exchange(proj, def);
1706         }
1707         proj = op->projs[pn_Load_X_except];
1708         if (proj != NULL) {
1709                 exchange(proj, new_Bad());
1710         }
1711         proj = op->projs[pn_Load_X_regular];
1712         if (proj != NULL) {
1713                 exchange(proj, new_r_Jmp(get_nodes_block(load)));
1714         }
1715 }  /* replace_load */
1716
1717 /**
1718  * Remove a Store memop.
1719  *
1720  * @param op  the Store memop
1721  */
1722 static void remove_store(memop_t *op) {
1723         ir_node *store = op->node;
1724         ir_node *proj;
1725
1726         DB((dbg, LEVEL_1, "Removing %+F\n", store));
1727
1728         if (op->mem != NULL) {
1729                 /* in rare cases a Store might have no memory */
1730                 exchange(op->mem, get_Store_mem(store));
1731         }
1732         proj = op->projs[pn_Store_X_except];
1733         if (proj != NULL) {
1734                 exchange(proj, new_Bad());
1735         }
1736         proj = op->projs[pn_Store_X_regular];
1737         if (proj != NULL) {
1738                 exchange(proj, new_r_Jmp(get_nodes_block(store)));
1739         }
1740 }  /* remove_store */
1741
1742
1743 /**
1744  * Do all necessary replacements for a given block.
1745  *
1746  * @param bl  the block
1747  */
1748 static void do_replacements(block_t *bl) {
1749         memop_t *op;
1750
1751         for (op = bl->memop_forward; op != NULL; op = op->next) {
1752                 if (op->flags & FLAG_KILLED_NODE) {
1753                         switch (get_irn_opcode(op->node)) {
1754                         case iro_Load:
1755                                 replace_load(op);
1756                                 break;
1757                         case iro_Store:
1758                                 remove_store(op);
1759                                 break;
1760                         }
1761                 }
1762         }
1763 }  /* do_replacements */
1764
1765 /**
1766  * Calculate the Avail_out sets for all basic blocks.
1767  */
1768 static void calcAvail(void) {
1769         memop_t  **tmp_memop = env.curr_id_2_memop;
1770         unsigned *tmp_set    = env.curr_set;
1771         block_t  *bl;
1772
1773         /* calculate avail_out */
1774         DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1775
1776         /* iterate over all blocks in in any order, skip the start block */
1777         for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1778                 forward_avail(bl);
1779         }
1780
1781         /* restore the current sets */
1782         env.curr_id_2_memop = tmp_memop;
1783         env.curr_set        = tmp_set;
1784 }  /* calcAvail */
1785
1786 /**
1787  * Calculate the Antic_in sets for all basic blocks.
1788  */
1789 static void calcAntic(void) {
1790         int i, need_iter;
1791
1792         /* calculate antic_out */
1793         DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1794         i = 0;
1795         do {
1796                 block_t *bl;
1797
1798                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1799
1800                 need_iter = 0;
1801
1802                 /* over all blocks in reverse post order */
1803                 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1804                         need_iter |= backward_antic(bl);
1805                 }
1806                 ++i;
1807         } while (need_iter);
1808         DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1809 }  /* calcAntic */
1810
1811 /**
1812  * Return the node representing the last memory in a block.
1813  *
1814  * @param bl  the block
1815  */
1816 static ir_node *find_last_memory(block_t *bl) {
1817         for (;;) {
1818                 if (bl->memop_backward != NULL) {
1819                         return bl->memop_backward->mem;
1820                 }
1821                 /* if there is NO memory in this block, go to the dominator */
1822                 bl = get_block_entry(get_Block_idom(bl->block));
1823         }
1824 }  /* find_last_memory */
1825
1826 /**
1827  * Reroute all memory users of old memory
1828  * to a new memory IR-node.
1829  *
1830  * @param omem  the old memory IR-node
1831  * @param nmem  the new memory IR-node
1832  */
1833 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
1834         int i;
1835
1836         for (i = get_irn_n_outs(omem) - 1; i >= 0; --i) {
1837                 int     n_pos;
1838                 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1839
1840                 set_irn_n(user, n_pos, nmem);
1841         }
1842
1843         /* all edges previously point to omem now point to nmem */
1844         nmem->out = omem->out;
1845 }  /* reroute_all_mem_users */
1846
1847 /**
1848  * Reroute memory users of old memory that are dominated by a given block
1849  * to a new memory IR-node.
1850  *
1851  * @param omem     the old memory IR-node
1852  * @param nmem     the new memory IR-node
1853  * @param pass_bl  the block the memory must pass
1854  */
1855 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl) {
1856         int             i, j, n = get_irn_n_outs(omem);
1857         ir_def_use_edge *edges = NEW_ARR_D(ir_def_use_edge, &env.obst, n + 1);
1858
1859         for (i = j = 0; i < n; ++i) {
1860                 int     n_pos;
1861                 ir_node *user   = get_irn_out_ex(omem, i, &n_pos);
1862                 ir_node *use_bl = get_nodes_block(user);
1863
1864
1865                 if (is_Phi(user)) {
1866                         use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1867                 }
1868                 if (block_dominates(pass_bl, use_bl)) {
1869                         /* found an user that is dominated */
1870                         ++j;
1871                         edges[j].pos = n_pos;
1872                         edges[j].use = user;
1873
1874                         set_irn_n(user, n_pos, nmem);
1875                 }
1876         }
1877
1878         /* Modify the out structure: we create a new out edge array on our
1879            temporary obstack here. This should be no problem, as we invalidate the edges
1880            at the end either. */
1881         /* first entry is used for the length */
1882         edges[0].pos = j;
1883         nmem->out = edges;
1884 }  /* reroute_mem_through */
1885
1886 /**
1887  * insert Loads, making partly redundant Loads fully redundant
1888  */
1889 static int insert_Load(block_t *bl) {
1890         ir_node  *block = bl->block;
1891         int      i, n = get_Block_n_cfgpreds(block);
1892         unsigned end = env.rbs_size - 1;
1893         unsigned pos;
1894
1895         DB((dbg, LEVEL_3, "processing %+F\n", block));
1896
1897         if (n == 0) {
1898                 /* might still happen for an unreachable block (end for instance) */
1899                 return 0;
1900         }
1901
1902         if (n > 1) {
1903                 ir_node **ins;
1904                 int     pos;
1905
1906                 NEW_ARR_A(ir_node *, ins, n);
1907
1908                 rbitset_set_all(env.curr_set, env.rbs_size);
1909
1910                 /* More than one predecessors, calculate the join for all avail_outs ignoring unevaluated
1911                    Blocks. These put in Top anyway. */
1912                 for (i = n - 1; i >= 0; --i) {
1913                         ir_node *pred = skip_Proj(get_Block_cfgpred(block, i));
1914                         ir_node *blk  = get_nodes_block(pred);
1915                         block_t *pred_bl;
1916
1917                         pred_bl = get_block_entry(blk);
1918                         rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
1919
1920                         if (is_Load(pred) || is_Store(pred)) {
1921                                 /* We reached this block by an exception from a Load or Store:
1922                                  * the memop creating the exception was NOT completed than, kill it
1923                                  */
1924                                 memop_t *exc_op = get_irn_memop(pred);
1925                                 rbitset_clear(env.curr_set, exc_op->value.id);
1926                         }
1927
1928                 }
1929                 /*
1930                  * Ensure that all values are in the map: build Phi's if necessary:
1931                  * Note: the last bit is the sentinel and ALWAYS set, so start with -2.
1932                  */
1933                 for (pos = env.rbs_size - 2; pos >= 0; --pos) {
1934                         if (! rbitset_is_set(env.curr_set, pos))
1935                                 env.curr_id_2_memop[pos] = NULL;
1936                         else {
1937                                 ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
1938                                 block_t *pred_bl = get_block_entry(pred);
1939                                 int     need_phi = 0;
1940                                 memop_t *first   = NULL;
1941                                 ir_mode *mode    = NULL;
1942
1943                                 for (i = 0; i < n; ++i) {
1944                                         memop_t *mop;
1945
1946                                         pred    = get_Block_cfgpred_block(bl->block, i);
1947                                         pred_bl = get_block_entry(pred);
1948
1949                                         mop = pred_bl->id_2_memop_avail[pos];
1950                                         if (first == NULL) {
1951                                                 first = mop;
1952                                                 ins[0] = first->value.value;
1953                                                 mode = get_irn_mode(ins[0]);
1954
1955                                                 /* no Phi needed so far */
1956                                                 env.curr_id_2_memop[pos] = first;
1957                                         } else {
1958                                                 ins[i] = conv_to(mop->value.value, mode);
1959                                                 if (ins[i] != ins[0]) {
1960                                                         if (ins[i] == NULL) {
1961                                                                 /* conversion failed */
1962                                                                 env.curr_id_2_memop[pos] = NULL;
1963                                                                 rbitset_clear(env.curr_set, pos);
1964                                                                 break;
1965                                                         }
1966                                                         need_phi = 1;
1967                                                 }
1968                                         }
1969                                 }
1970                                 if (need_phi) {
1971                                         /* build a Phi  */
1972                                         ir_node *phi = new_r_Phi(bl->block, n, ins, mode);
1973                                         memop_t *phiop = alloc_memop(phi);
1974
1975                                         phiop->value = first->value;
1976                                         phiop->value.value = phi;
1977
1978                                         /* no need to link it in, as it is a DATA phi */
1979
1980                                         env.curr_id_2_memop[pos] = phiop;
1981
1982                                         DB((dbg, LEVEL_3, "Created new %+F on merging value for address %+F\n", phi, first->value.address));
1983                                 }
1984                         }
1985                 }
1986         } else {
1987                 /* only one predecessor, simply copy the map */
1988                 ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
1989                 block_t *pred_bl = get_block_entry(pred);
1990
1991                 rbitset_copy(env.curr_set, pred_bl->avail_out, env.rbs_size);
1992
1993                 memcpy(env.curr_id_2_memop, pred_bl->id_2_memop_avail, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
1994         }
1995
1996         if (n > 1) {
1997                 /* check for partly redundant values */
1998                 for (pos = rbitset_next(bl->anticL_in, 0, 1);
1999                      pos < end;
2000                      pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
2001                         memop_t *op = bl->id_2_memop_antic[pos];
2002                         int     have_some, all_same;
2003                         ir_node *first;
2004
2005                         if (rbitset_is_set(env.curr_set, pos)) {
2006                                 /* already avail */
2007                                 continue;
2008                         }
2009
2010                         assert(is_Load(op->node));
2011
2012                         DB((dbg, LEVEL_3, "anticipated %+F\n", op->node));
2013
2014                         have_some  = 0;
2015                         all_same   = 1;
2016                         first      = 0;
2017                         for (i = n - 1; i >= 0; --i) {
2018                                 ir_node *pred    = get_Block_cfgpred_block(block, i);
2019                                 block_t *pred_bl = get_block_entry(pred);
2020                                 ir_mode *mode    = op->value.mode;
2021                                 memop_t *e;
2022                                 ir_node *adr;
2023
2024                                 adr = phi_translate(op->value.address, block, i);
2025                                 DB((dbg, LEVEL_3, ".. using address %+F in pred %d\n", adr, i));
2026                                 e   = find_address_avail(pred_bl, register_address(adr), mode);
2027                                 if (e == NULL) {
2028                                         ir_node *ef_block = get_nodes_block(adr);
2029                                         if (! block_dominates(ef_block, pred)) {
2030                                                 /* cannot place a copy here */
2031                                                 have_some = 0;
2032                                                 DB((dbg, LEVEL_3, "%+F cannot be moved into predecessor %+F\n", op->node, pred));
2033                                                 break;
2034                                         }
2035                                         DB((dbg, LEVEL_3, "%+F is not available in predecessor %+F\n", op->node, pred));
2036                                         pred_bl->avail = NULL;
2037                                         all_same       = 0;
2038                                 } else {
2039                                         if (e->value.mode != mode && !can_convert_to(e->value.mode, mode)) {
2040                                                 /* cannot create a Phi due to different modes */
2041                                                 have_some = 0;
2042                                                 break;
2043                                         }
2044                                         pred_bl->avail = e;
2045                                         have_some      = 1;
2046                                         DB((dbg, LEVEL_3, "%+F is available for %+F in predecessor %+F\n", e->node, op->node, pred));
2047                                         if (first == NULL)
2048                                                 first = e->node;
2049                                         else if (first != e->node)
2050                                                 all_same = 0;
2051                                 }
2052                         }
2053                         if (have_some && !all_same) {
2054                                 ir_mode *mode = op->value.mode;
2055                                 ir_node **in, *phi;
2056                                 memop_t *phi_op;
2057
2058                                 NEW_ARR_A(ir_node *, in, n);
2059
2060                                 for (i = n - 1; i >= 0; --i) {
2061                                         ir_node *pred    = get_Block_cfgpred_block(block, i);
2062                                         block_t *pred_bl = get_block_entry(pred);
2063
2064                                         if (pred_bl->avail == NULL) {
2065                                                 /* create a new Load here and make to make it fully redundant */
2066                                                 dbg_info *db       = get_irn_dbg_info(op->node);
2067                                                 ir_node  *last_mem = find_last_memory(pred_bl);
2068                                                 ir_node  *load, *def, *adr;
2069                                                 memop_t  *new_op;
2070
2071                                                 assert(last_mem != NULL);
2072                                                 adr  = phi_translate(op->value.address, block, i);
2073                                                 load = new_rd_Load(db, pred, last_mem, adr, mode, cons_none);
2074                                                 def  = new_r_Proj(pred, load, mode, pn_Load_res);
2075                                                 DB((dbg, LEVEL_1, "Created new %+F in %+F for party redundant %+F\n", load, pred, op->node));
2076
2077                                                 new_op                = alloc_memop(load);
2078                                                 new_op->mem           = new_r_Proj(pred, load, mode_M, pn_Load_M);
2079                                                 new_op->value.address = adr;
2080                                                 new_op->value.id      = op->value.id;
2081                                                 new_op->value.mode    = mode;
2082                                                 new_op->value.value   = def;
2083
2084                                                 new_op->projs[pn_Load_M]   = new_op->mem;
2085                                                 new_op->projs[pn_Load_res] = def;
2086
2087                                                 new_op->prev = pred_bl->memop_backward;
2088                                                 if (pred_bl->memop_backward != NULL)
2089                                                         pred_bl->memop_backward->next = new_op;
2090
2091                                                 pred_bl->memop_backward = new_op;
2092
2093                                                 if (pred_bl->memop_forward == NULL)
2094                                                         pred_bl->memop_forward = new_op;
2095
2096                                                 if (get_nodes_block(last_mem) == pred) {
2097                                                         /* We have add a new last memory op in pred block.
2098                                                            If pred had already a last mem, reroute all memory
2099                                                            users. */
2100                                                         reroute_all_mem_users(last_mem, new_op->mem);
2101                                                 } else {
2102                                                         /* reroute only those memory going through the pre block */
2103                                                         reroute_mem_through(last_mem, new_op->mem, pred);
2104                                                 }
2105
2106                                                 /* we added this load at the end, so it will be avail anyway */
2107                                                 add_memop_avail(pred_bl, new_op);
2108                                                 pred_bl->avail = new_op;
2109                                         }
2110                                         in[i] = conv_to(pred_bl->avail->value.value, mode);
2111                                 }
2112                                 phi = new_r_Phi(block, n, in, mode);
2113                                 DB((dbg, LEVEL_1, "Created new %+F in %+F for now redundant %+F\n", phi, block, op->node));
2114
2115                                 phi_op = clone_memop_phi(op, phi);
2116                                 add_memop(phi_op);
2117                         }
2118                 }
2119         }
2120
2121         /* recalculate avail by gen and kill */
2122         calc_gen_kill_avail(bl);
2123
2124         /* always update the map after gen/kill, as values might have been changed due to RAR/WAR/WAW */
2125         memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2126
2127         if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
2128                 /* the avail set has changed */
2129                 rbitset_copy(bl->avail_out, env.curr_set, env.rbs_size);
2130                 dump_curr(bl, "Avail_out*");
2131                 return 1;
2132         }
2133         dump_curr(bl, "Avail_out");
2134         return 0;
2135 }  /* insert_Load */
2136
2137 /**
2138  * Insert Loads upwards.
2139  */
2140 static void insert_Loads_upwards(void) {
2141         int i, need_iter;
2142         block_t *bl;
2143
2144         /* recalculate antic_out and insert Loads */
2145         DB((dbg, LEVEL_2, "Inserting Loads\n"));
2146
2147         i = 0;
2148         do {
2149                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
2150
2151                 need_iter = 0;
2152
2153                 /* over all blocks in reverse post order, skip the start block */
2154                 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
2155                         need_iter |= insert_Load(bl);
2156                 }
2157                 ++i;
2158         } while (need_iter);
2159
2160         DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
2161 }  /* insert_Loads_upwards */
2162
2163 /**
2164  * Kill unreachable control flow.
2165  *
2166  * @param irg  the graph to operate on
2167  */
2168 static void kill_unreachable_blocks(ir_graph *irg) {
2169         block_t *bl;
2170         ir_node **ins;
2171         int     changed = 0;
2172
2173         NEW_ARR_A(ir_node *, ins, env.max_cfg_preds);
2174
2175         for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2176                 ir_node *block = bl->block;
2177                 int     i, j, k, n;
2178
2179                 assert(get_Block_mark(block));
2180
2181                 n = get_Block_n_cfgpreds(block);
2182
2183                 for (i = j = 0; i < n; ++i) {
2184                         ir_node *pred = get_Block_cfgpred(block, i);
2185                         ir_node *pred_bl;
2186
2187                         if (is_Bad(pred))
2188                                 continue;
2189
2190                         pred_bl = get_nodes_block(skip_Proj(pred));
2191                         if (! get_Block_mark(pred_bl))
2192                                 continue;
2193
2194                         ins[j++] = pred;
2195                 }
2196                 if (j != n) {
2197                         ir_node *phi, *next;
2198
2199                         /* some unreachable blocks detected */
2200                         changed = 1;
2201
2202                         DB((dbg, LEVEL_1, "Killing dead block predecessors on %+F\n", block));
2203
2204                         set_irn_in(block, j, ins);
2205
2206                         /* shorten all Phi nodes */
2207                         for (phi = get_Block_phis(block); phi != NULL; phi = next) {
2208                                 next = get_Phi_next(phi);
2209
2210                                 for (i = k = 0; i < n; ++i) {
2211                                         ir_node *pred = get_Block_cfgpred_block(block, i);
2212
2213                                         if (is_Bad(pred))
2214                                                 continue;
2215
2216                                         if (! get_Block_mark(pred))
2217                                                 continue;
2218
2219                                         ins[k++] = get_Phi_pred(phi, i);
2220                                 }
2221                                 if (k == 1)
2222                                         exchange(phi, ins[0]);
2223                                 else
2224                                         set_irn_in(phi, k, ins);
2225                         }
2226                 }
2227
2228         }
2229
2230         if (changed) {
2231                 /* kick keep alives */
2232                 ir_node *end = get_irg_end(irg);
2233                 int     i, j, n = get_End_n_keepalives(end);
2234
2235                 NEW_ARR_A(ir_node *, ins, n);
2236
2237                 for (i = j = 0; i < n; ++i) {
2238                         ir_node *ka = get_End_keepalive(end, i);
2239                         ir_node *ka_bl;
2240
2241                         if (is_Bad(ka))
2242                                 continue;
2243                         if (is_Block(ka))
2244                                 ka_bl = ka;
2245                         else
2246                                 ka_bl = get_nodes_block(skip_Proj(ka));
2247                         if (get_Block_mark(ka_bl))
2248                                 ins[j++] = ka;
2249                 }
2250                 if (j != n)
2251                         set_End_keepalives(end, j, ins);
2252
2253                 free_irg_outs(irg);
2254
2255                 /* this transformation do NOT invalidate the dominance */
2256         }
2257 }  /* kill_unreachable_blocks */
2258
2259 int opt_ldst(ir_graph *irg) {
2260         block_t  *bl;
2261         ir_graph *rem = current_ir_graph;
2262
2263         current_ir_graph = irg;
2264
2265         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
2266 //      firm_dbg_set_mask(dbg, -1);
2267
2268         DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
2269
2270         /* we need landing pads */
2271         remove_critical_cf_edges(irg);
2272
2273 //      dump_ir_block_graph(irg, "-XXX");
2274
2275         if (get_opt_alias_analysis()) {
2276                 assure_irg_entity_usage_computed(irg);
2277                 assure_irp_globals_entity_usage_computed();
2278         }
2279
2280         obstack_init(&env.obst);
2281         ir_nodemap_init(&env.adr_map);
2282
2283         env.forward       = NULL;
2284         env.backward      = NULL;
2285         env.curr_adr_id   = 0;
2286         env.n_mem_ops     = 0;
2287         env.max_cfg_preds = 0;
2288         env.changed       = 0;
2289         env.start_bl      = get_irg_start_block(irg);
2290         env.end_bl        = get_irg_end_block(irg);
2291 #ifdef DEBUG_libfirm
2292         env.id_2_address  = NEW_ARR_F(ir_node *, 0);
2293 #endif
2294
2295         assure_irg_outs(irg);
2296
2297         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2298
2299         /* first step: allocate block entries. Note that some blocks might be
2300            unreachable here. Using the normal walk ensures that ALL blocks are initialized. */
2301         irg_walk_graph(irg, prepare_blocks, link_phis, NULL);
2302
2303         /* produce an inverse post-order list for the CFG: this links only reachable
2304            blocks */
2305         irg_out_block_walk(get_irg_start_block(irg), NULL, inverse_post_order, NULL);
2306
2307         if (! get_Block_mark(env.end_bl)) {
2308                 /*
2309                  * The end block is NOT reachable due to endless loops
2310                  * or no_return calls.
2311                  * Place the end block last.
2312                  * env.backward points to the last block in the list for this purpose.
2313                  */
2314                 env.backward->forward_next = get_block_entry(env.end_bl);
2315
2316                 set_Block_mark(env.end_bl, 1);
2317         }
2318
2319         /* KILL unreachable blocks: these disturb the data flow analysis */
2320         kill_unreachable_blocks(irg);
2321
2322         assure_doms(irg);
2323
2324         /* second step: find and sort all memory ops */
2325         walk_memory_irg(irg, collect_memops, NULL, NULL);
2326
2327 #ifdef DEBUG_libfirm
2328         /* check that the backward map is correct */
2329         assert((unsigned)ARR_LEN(env.id_2_address) == env.curr_adr_id);
2330 #endif
2331
2332         if (env.n_mem_ops == 0) {
2333                 /* no memory ops */
2334                 goto end;
2335         }
2336
2337         /* create the backward links. */
2338         env.backward = NULL;
2339         irg_block_walk_graph(irg, NULL, collect_backward, NULL);
2340
2341         /* link the end block in */
2342         bl = get_block_entry(env.end_bl);
2343         bl->backward_next = env.backward;
2344         env.backward      = bl;
2345
2346         /* check that we really start with the start / end block */
2347         assert(env.forward->block  == env.start_bl);
2348         assert(env.backward->block == env.end_bl);
2349
2350         /* create address sets: for now, only the existing addresses are allowed plus one
2351            needed for the sentinel */
2352         env.rbs_size = env.curr_adr_id + 1;
2353
2354         /* create the current set */
2355         env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2356         rbitset_set(env.curr_set, env.rbs_size - 1);
2357         env.curr_id_2_memop = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2358         memset(env.curr_id_2_memop, 0, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2359
2360         for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2361                 /* set sentinel bits */
2362                 bl->avail_out  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2363                 rbitset_set(bl->avail_out, env.rbs_size - 1);
2364
2365                 bl->id_2_memop_avail = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2366                 memset(bl->id_2_memop_avail, 0, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
2367
2368                 bl->anticL_in  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2369                 rbitset_set(bl->anticL_in, env.rbs_size - 1);
2370
2371                 bl->id_2_memop_antic = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2372                 memset(bl->id_2_memop_antic, 0, env.rbs_size * sizeof(bl->id_2_memop_antic[0]));
2373         }
2374
2375 //      dump_block_list(&env);
2376         (void) dump_block_list;
2377
2378         calcAvail();
2379         calcAntic();
2380
2381         insert_Loads_upwards();
2382
2383         if (env.changed) {
2384                 /* over all blocks in reverse post order */
2385                 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2386                         do_replacements(bl);
2387                 }
2388
2389                 /* not only invalidate but free them. We might allocate new out arrays
2390                    on our obstack which will be deleted yet. */
2391                 free_irg_outs(irg);
2392                 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
2393         }
2394 end:
2395
2396         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2397         ir_nodemap_destroy(&env.adr_map);
2398         obstack_free(&env.obst, NULL);
2399
2400 //      dump_ir_block_graph(irg, "-YYY");
2401
2402 #ifdef DEBUG_libfirm
2403         DEL_ARR_F(env.id_2_address);
2404 #endif
2405
2406         current_ir_graph = rem;
2407         return env.changed != 0;
2408 }  /* opt_ldst */
2409
2410 ir_graph_pass_t *opt_ldst_pass(const char *name)
2411 {
2412         return def_graph_pass_ret(name ? name : "ldst_df", opt_ldst);
2413 }  /* opt_ldst_pass */