- register ALL possible addresses yet
[libfirm] / ir / opt / opt_ldst.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Dataflow driven Load/Store optimizations, uses some ideas from
23  *          VanDrunen's LEPRE
24  * @author  Michael Beck
25  * @version $Id$
26  */
27 #include "config.h"
28
29 #include "irnode_t.h"
30 #include "irflag_t.h"
31 #include "array_t.h"
32 #include "ircons.h"
33 #include "irdom.h"
34 #include "irgmod.h"
35 #include "irgwalk.h"
36 #include "irouts.h"
37 #include "irgraph.h"
38 #include "irgopt.h"
39 #include "iropt.h"
40 #include "iroptimize.h"
41 #include "irnodemap.h"
42 #include "raw_bitset.h"
43 #include "debug.h"
44 #include "error.h"
45
46 /* maximum number of output Proj's */
47 #define MAX_PROJ (pn_Load_max > pn_Store_max ? pn_Load_max : pn_Store_max)
48
49 /**
50  * Mapping an address to an dense ID.
51  */
52 typedef struct address_entry_t {
53         unsigned id;          /**< The ID */
54 } address_entry;
55
56 /**
57  * Memop-flags.
58  */
59 enum memop_flags {
60         FLAG_KILL_ALL    = 1, /**< KILL all addresses */
61         FLAG_KILLED_NODE = 2, /**< this node was killed */
62         FLAG_EXCEPTION   = 4, /**< this node has exception flow */
63         FLAG_IGNORE      = 8, /**< ignore this node (volatile or other) */
64 };
65
66 /**
67  * A value: This represents a value stored at a given address in
68  * memory. Do not confuse with values from value numbering.
69  */
70 typedef struct value_t value_t;
71 struct value_t {
72         ir_node  *address;    /**< the address of this value */
73         ir_node  *value;      /**< the value itself */
74         ir_mode  *mode;       /**< the mode of the value */
75         unsigned id;          /**< address id */
76 };
77
78 /**
79  * A memop describes an memory-related operation.
80  * These are Loads/Store and all other ops that might modify
81  * memory (Calls, CopyB) or causing exceptions.
82  */
83 typedef struct memop_t memop_t;
84 struct memop_t {
85         value_t  value;      /**< the value of this memop: only defined for Load/Store */
86         ir_node  *node;      /**< the memory op itself */
87         ir_node  *mem;       /**< the memory FROM this node */
88         ir_node  *replace;   /**< the replacement node if this memop is replaced */
89         memop_t  *next;      /**< links to the next memory op in the block in forward order. */
90         memop_t  *prev;      /**< links to the previous memory op in the block in forward order. */
91         unsigned flags;      /**< memop flags */
92         ir_node  *projs[MAX_PROJ]; /**< Projs of this memory op */
93 };
94
95 /**
96  * Additional data for every basic block.
97  */
98 typedef struct block_t block_t;
99 struct block_t {
100         memop_t  *memop_forward;     /**< topologically sorted list of memory ops in this block */
101         memop_t  *memop_backward;    /**< last memop in the list */
102         unsigned *avail_out;         /**< out-set of available addresses */
103         memop_t  **id_2_memop_avail; /**< maps avail address ids to memops */
104         unsigned *anticL_in;         /**< in-set of anticipated Load addresses */
105         memop_t  **id_2_memop_antic; /**< maps anticipated address ids to memops */
106         ir_node  *block;             /**< the associated block */
107         block_t  *forward_next;      /**< next block entry for forward iteration */
108         block_t  *backward_next;     /**< next block entry for backward iteration */
109         memop_t  *avail;             /**< used locally for the avail map */
110         memop_t  **trans_results;    /**< used to cached translated nodes due antic calculation. */
111 };
112
113 /**
114  * Metadata for this pass.
115  */
116 typedef struct ldst_env_t {
117         struct obstack  obst;              /**< obstack for temporary data */
118         ir_nodemap_t    adr_map;           /**< Map addresses to */
119         block_t         *forward;          /**< Inverse post-order list of all blocks Start->End */
120         block_t         *backward;         /**< Inverse post-order list of all blocks End->Start */
121         ir_node         *start_bl;         /**< start block of the current graph */
122         ir_node         *end_bl;           /**< end block of the current graph */
123         unsigned        *curr_set;         /**< current set of addresses */
124         memop_t         **curr_id_2_memop; /**< current map of address ids to memops */
125         unsigned        curr_adr_id;       /**< number for address mapping */
126         unsigned        n_mem_ops;         /**< number of memory operations (Loads/Stores) */
127         unsigned        rbs_size;          /**< size of all bitsets in bytes */
128         int             max_cfg_preds;     /**< maximum number of block cfg predecessors */
129         int             changed;           /**< Flags for changed graph state */
130 #ifdef DEBUG_libfirm
131         ir_node         **id_2_address;    /**< maps an id to the used address */
132 #endif
133 } ldst_env;
134
135 #ifdef DEBUG_libfirm
136
137 static firm_dbg_module_t *dbg;
138
139 /* the one and only environment */
140 static ldst_env env;
141
142 /**
143  * Dumps the block list.
144  *
145  * @param ldst environment
146  */
147 static void dump_block_list(ldst_env *env) {
148         block_t *entry;
149         memop_t *op;
150         int     i;
151
152         for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
153                 DB((dbg, LEVEL_2, "%+F {", entry->block));
154
155                 i = 0;
156                 for (op = entry->memop_forward; op != NULL; op = op->next) {
157                         if (i == 0) {
158                                 DB((dbg, LEVEL_2, "\n\t"));
159                         }                       DB((dbg, LEVEL_2, "%+F", op->node));
160                         if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
161                                 DB((dbg, LEVEL_2, "X"));
162                         else if (op->flags & FLAG_KILL_ALL)
163                                 DB((dbg, LEVEL_2, "K"));
164                         DB((dbg, LEVEL_2, ", "));
165
166                         i = (i + 1) & 3;
167                 }
168                 DB((dbg, LEVEL_2, "\n}\n\n"));
169         }
170 }  /* dump_block_list */
171
172 /**
173  * Dumps the current set.
174  *
175  * @param bl   current block
176  * @param s    name of the set
177  */
178 static void dump_curr(block_t *bl, const char *s) {
179         unsigned end = env.rbs_size - 1;
180         unsigned pos;
181         int      i;
182
183         DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
184         i = 0;
185         for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
186                 memop_t *op = env.curr_id_2_memop[pos];
187
188                 if (i == 0) {
189                         DB((dbg, LEVEL_2, "\n\t"));
190                 }
191
192                 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
193                 i = (i + 1) & 3;
194         }
195         DB((dbg, LEVEL_2, "\n}\n"));
196 }  /* dump_curr */
197
198 #else
199 #define dump_block_list()
200 #define dump_curr(bl, s)
201 #endif /* DEBUG_libfirm */
202
203 /** Get the block entry for a block node */
204 static block_t *get_block_entry(const ir_node *block) {
205         assert(is_Block(block));
206
207         return get_irn_link(block);
208 }  /* get_block_entry */
209
210 /** Get the memop entry for a memory operation node */
211 static memop_t *get_irn_memop(const ir_node *irn) {
212         assert(! is_Block(irn));
213         return get_irn_link(irn);
214 }  /* get_irn_memop */
215
216 /**
217  * Walk over the memory edges from definition to users.
218  * This ensures, that even operation without memory output are found.
219  *
220  * @param irn   start node
221  * @param pre   pre walker function
222  * @param post  post walker function
223  * @param ctx   context parameter for the walker functions
224  */
225 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx) {
226         int     i;
227         ir_mode *mode;
228
229         mark_irn_visited(irn);
230
231         if (pre)
232                 pre(irn, ctx);
233
234         mode = get_irn_mode(irn);
235         if (mode == mode_M) {
236                 /* every successor uses memory */
237                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
238                         ir_node *succ = get_irn_out(irn, i);
239
240                         if (! irn_visited(succ))
241                                 walk_memory(succ, pre, post, ctx);
242                 }
243         } else if (mode == mode_T) {
244                 /* only some Proj's uses memory */
245                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
246                         ir_node *proj = get_irn_out(irn, i);
247
248                         if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
249                                 walk_memory(proj, pre, post, ctx);
250                 }
251         }
252         if (post)
253                 post(irn, ctx);
254 }  /* walk_memory */
255
256 /**
257  * Walks over all memory nodes of a graph.
258  *
259  * @param irg   a graph
260  * @param pre   pre walker function
261  * @param post  post walker function
262  * @param ctx   context parameter for the walker functions
263  */
264 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx) {
265         inc_irg_visited(irg);
266
267         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
268
269         /*
270          * there are two possible sources for memory: initial_mem and nomem
271          * we ignore nomem as this should NOT change the memory
272          */
273         walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
274
275         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
276 }  /* walk_memory_irg */
277
278 /**
279  * Register an address and allocate a (sparse, 0..n) ID for it.
280  *
281  * @param adr  the IR-node representing the address
282  *
283  * @return the allocated id
284  */
285 static unsigned register_address(ir_node *adr) {
286         address_entry *entry;
287
288         /* skip Confirms and Casts */
289 restart:
290         if (is_Confirm(adr)) {
291                 adr = get_Confirm_value(adr);
292                 goto restart;
293         }
294         if (is_Cast(adr)) {
295                 adr = get_Cast_op(adr);
296                 goto restart;
297         }
298
299         entry = ir_nodemap_get(&env.adr_map, adr);
300
301         if (entry == NULL) {
302                 /* new address */
303                 entry = obstack_alloc(&env.obst, sizeof(*entry));
304
305                 entry->id = env.curr_adr_id++;
306                 ir_nodemap_insert(&env.adr_map, adr, entry);
307
308                 DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
309 #ifdef DEBUG_libfirm
310                 ARR_APP1(ir_node *, env.id_2_address, adr);
311 #endif
312         }
313         return entry->id;
314 }  /* register_address */
315
316
317 /**
318  * translate an address through a Phi node into a given predecessor
319  * block.
320  *
321  * @param address  the address
322  * @param block    the block
323  * @param pos      the position of the predecessor in block
324  */
325 static ir_node *phi_translate(ir_node *address, const ir_node *block, int pos) {
326         if (is_Phi(address) && get_nodes_block(address) == block)
327                 address = get_Phi_pred(address, pos);
328         return address;
329 }  /* phi_translate */
330
331 /**
332  * Get the effective block of an address in the pos'th predecessor
333  * of the given block.
334  *
335  * @param address  the address
336  * @param block    the block
337  * @param pos      the position of the predecessor in block
338  */
339 static ir_node *get_effective_block(ir_node *address, ir_node *block, int pos) {
340         address = phi_translate(address, block, pos);
341         return get_nodes_block(address);
342 }  /* get_effective_block */
343
344 /**
345  * Walker: allocate an block entry for every block
346  * and register all potential addresses.
347  */
348 static void prepare_blocks(ir_node *irn, void *ctx) {
349         (void)ctx;
350
351         if (is_Block(irn)) {
352                 block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
353                 int     n;
354
355                 entry->memop_forward    = NULL;
356                 entry->memop_backward   = NULL;
357                 entry->avail_out        = NULL;
358                 entry->id_2_memop_avail = NULL;
359                 entry->anticL_in        = NULL;
360                 entry->id_2_memop_antic = NULL;
361                 entry->block            = irn;
362                 entry->forward_next     = NULL;
363                 entry->backward_next    = NULL;
364                 entry->avail            = NULL;
365                 entry->trans_results    = NULL;
366                 set_irn_link(irn, entry);
367
368                 set_Block_phis(irn, NULL);
369
370                 /* use block marks to track unreachable blocks */
371                 set_Block_mark(irn, 0);
372
373                 n = get_Block_n_cfgpreds(irn);
374                 if (n > env.max_cfg_preds)
375                         env.max_cfg_preds = n;
376         } else {
377                 ir_mode *mode = get_irn_mode(irn);
378
379                 if (mode_is_reference(mode)) {
380                         /*
381                          * Register ALL possible addresses: this is overkill yet but
382                          * simpler then doing it for all possible translated addresses
383                          * (which would be sufficient in the moment.
384                          */
385                         (void)register_address(irn);
386                 }
387         }
388 }  /* prepare_blocks */
389
390 /**
391  * Post-Walker, link in all Phi's
392  */
393 static void link_phis(ir_node *irn, void *ctx) {
394         (void)ctx;
395
396         if (is_Phi(irn)) {
397                 ir_node *block = get_nodes_block(irn);
398                 add_Block_phi(block, irn);
399         }
400 }  /* link_phis */
401
402 /**
403  * Block walker: creates the inverse post-order list for the CFG.
404  */
405 static void inverse_post_order(ir_node *block, void *ctx) {
406         block_t *entry = get_block_entry(block);
407
408         (void)ctx;
409
410         /* mark this block IS reachable from start */
411         set_Block_mark(block, 1);
412
413         /* create the list in inverse order */
414         entry->forward_next = env.forward;
415         env.forward         = entry;
416
417         /* remember the first visited (last in list) entry, needed for later */
418         if (env.backward == NULL)
419                 env.backward = entry;
420 }  /* inverse_post_order */
421
422 /**
423  * Block walker: create backward links for the memops of a block.
424  */
425 static void collect_backward(ir_node *block, void *ctx) {
426         block_t *entry = get_block_entry(block);
427         memop_t *last, *op;
428
429         (void)ctx;
430
431         /*
432          * Do NOT link in the end block yet. We want it to be
433          * the first in the list. This is NOT guaranteed by the walker
434          * if we have endless loops.
435          */
436         if (block != env.end_bl) {
437                 entry->backward_next = env.backward;
438
439                 /* create the list in inverse order */
440                 env.backward = entry;
441         }
442
443         /* create backward links for all memory ops */
444         last = NULL;
445         for (op = entry->memop_forward; op != NULL; op = op->next) {
446                 op->prev = last;
447                 last     = op;
448         }
449         entry->memop_backward = last;
450 }  /* collect_backward */
451
452 /**
453  * Allocate a memop.
454  *
455  * @param irn  the IR-node representing the memop or NULL
456  *             if this is a translated (virtual) memop
457  *
458  * @return the allocated memop
459  */
460 static memop_t *alloc_memop(ir_node *irn) {
461         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
462
463         m->value.address = NULL;
464         m->value.value   = NULL;
465         m->value.mode    = NULL;
466
467         m->node          = irn;
468         m->mem           = NULL;
469         m->replace       = NULL;
470         m->next          = NULL;
471         m->flags         = 0;
472
473         memset(m->projs, 0, sizeof(m->projs));
474
475         if (irn != NULL)
476                 set_irn_link(irn, m);
477         return m;
478 }  /* alloc_memop */
479
480 /**
481  * Create a memop for a Phi-replacement.
482  *
483  * @param op   the memop to clone
484  * @param phi  the Phi-node representing the new value
485  */
486 static memop_t *clone_memop_phi(memop_t *op, ir_node *phi) {
487         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
488
489         m->value         = op->value;
490         m->value.value   = phi;
491
492         m->node          = phi;
493         m->replace       = NULL;
494         m->next          = NULL;
495         m->flags         = 0;
496
497         set_irn_link(phi, m);
498         return m;
499 }  /* clone_memop_phi */
500
501 /**
502  * Return the memory properties of a call node.
503  *
504  * @param call  the call node
505  *
506  * return a bitset of mtp_property_const and mtp_property_pure
507  */
508 static unsigned get_Call_memory_properties(ir_node *call) {
509         ir_type *call_tp = get_Call_type(call);
510         unsigned prop = get_method_additional_properties(call_tp);
511
512         /* check first the call type */
513         if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
514                 /* try the called entity */
515                 ir_node *ptr = get_Call_ptr(call);
516
517                 if (is_Global(ptr)) {
518                         ir_entity *ent = get_Global_entity(ptr);
519
520                         prop = get_entity_additional_properties(ent);
521                 }
522         }
523         return prop & (mtp_property_const|mtp_property_pure);
524 }  /* get_Call_memory_properties */
525
526 /**
527  * Returns an entity if the address ptr points to a constant one.
528  *
529  * @param ptr  the address
530  *
531  * @return an entity or NULL
532  */
533 static ir_entity *find_constant_entity(ir_node *ptr) {
534         for (;;) {
535                 if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
536                         return get_SymConst_entity(ptr);
537                 } else if (is_Sel(ptr)) {
538                         ir_entity *ent = get_Sel_entity(ptr);
539                         ir_type   *tp  = get_entity_owner(ent);
540
541                         /* Do not fiddle with polymorphism. */
542                         if (is_Class_type(get_entity_owner(ent)) &&
543                                 ((get_entity_n_overwrites(ent)    != 0) ||
544                                 (get_entity_n_overwrittenby(ent) != 0)   ) )
545                                 return NULL;
546
547                         if (is_Array_type(tp)) {
548                                 /* check bounds */
549                                 int i, n;
550
551                                 for (i = 0, n = get_Sel_n_indexs(ptr); i < n; ++i) {
552                                         ir_node *bound;
553                                         tarval *tlower, *tupper;
554                                         ir_node *index = get_Sel_index(ptr, i);
555                                         tarval *tv     = computed_value(index);
556
557                                         /* check if the index is constant */
558                                         if (tv == tarval_bad)
559                                                 return NULL;
560
561                                         bound  = get_array_lower_bound(tp, i);
562                                         tlower = computed_value(bound);
563                                         bound  = get_array_upper_bound(tp, i);
564                                         tupper = computed_value(bound);
565
566                                         if (tlower == tarval_bad || tupper == tarval_bad)
567                                                 return NULL;
568
569                                         if (tarval_cmp(tv, tlower) & pn_Cmp_Lt)
570                                                 return NULL;
571                                         if (tarval_cmp(tupper, tv) & pn_Cmp_Lt)
572                                                 return NULL;
573
574                                         /* ok, bounds check finished */
575                                 }
576                         }
577
578                         if (variability_constant == get_entity_variability(ent))
579                                 return ent;
580
581                         /* try next */
582                         ptr = get_Sel_ptr(ptr);
583                 } else if (is_Add(ptr)) {
584                         ir_node *l = get_Add_left(ptr);
585                         ir_node *r = get_Add_right(ptr);
586
587                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
588                                 ptr = l;
589                         else if (get_irn_mode(r) == get_irn_mode(ptr) && is_Const(l))
590                                 ptr = r;
591                         else
592                                 return NULL;
593
594                         /* for now, we support only one addition, reassoc should fold all others */
595                         if (! is_SymConst(ptr) && !is_Sel(ptr))
596                                 return NULL;
597                 } else if (is_Sub(ptr)) {
598                         ir_node *l = get_Sub_left(ptr);
599                         ir_node *r = get_Sub_right(ptr);
600
601                         if (get_irn_mode(l) == get_irn_mode(ptr) &&     is_Const(r))
602                                 ptr = l;
603                         else
604                                 return NULL;
605                         /* for now, we support only one subtraction, reassoc should fold all others */
606                         if (! is_SymConst(ptr) && !is_Sel(ptr))
607                                 return NULL;
608                 } else
609                         return NULL;
610         }
611 }  /* find_constant_entity */
612
613 /**
614  * Return the Selection index of a Sel node from dimension n
615  */
616 static long get_Sel_array_index_long(ir_node *n, int dim) {
617         ir_node *index = get_Sel_index(n, dim);
618         assert(is_Const(index));
619         return get_tarval_long(get_Const_tarval(index));
620 }  /* get_Sel_array_index_long */
621
622 /**
623  * Returns the accessed component graph path for an
624  * node computing an address.
625  *
626  * @param ptr    the node computing the address
627  * @param depth  current depth in steps upward from the root
628  *               of the address
629  */
630 static compound_graph_path *rec_get_accessed_path(ir_node *ptr, int depth) {
631         compound_graph_path *res = NULL;
632         ir_entity           *root, *field, *ent;
633         int                 path_len, pos, idx;
634         tarval              *tv;
635         ir_type             *tp;
636
637         if (is_SymConst(ptr)) {
638                 /* a SymConst. If the depth is 0, this is an access to a global
639                  * entity and we don't need a component path, else we know
640                  * at least its length.
641                  */
642                 assert(get_SymConst_kind(ptr) == symconst_addr_ent);
643                 root = get_SymConst_entity(ptr);
644                 res = (depth == 0) ? NULL : new_compound_graph_path(get_entity_type(root), depth);
645         } else if (is_Sel(ptr)) {
646                 /* it's a Sel, go up until we find the root */
647                 res = rec_get_accessed_path(get_Sel_ptr(ptr), depth+1);
648                 if (res == NULL)
649                         return NULL;
650
651                 /* fill up the step in the path at the current position */
652                 field    = get_Sel_entity(ptr);
653                 path_len = get_compound_graph_path_length(res);
654                 pos      = path_len - depth - 1;
655                 set_compound_graph_path_node(res, pos, field);
656
657                 if (is_Array_type(get_entity_owner(field))) {
658                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
659                         set_compound_graph_path_array_index(res, pos, get_Sel_array_index_long(ptr, 0));
660                 }
661         } else if (is_Add(ptr)) {
662                 ir_node *l    = get_Add_left(ptr);
663                 ir_node *r    = get_Add_right(ptr);
664                 ir_mode *mode = get_irn_mode(ptr);
665                 tarval  *tmp;
666
667                 if (is_Const(r) && get_irn_mode(l) == mode) {
668                         ptr = l;
669                         tv  = get_Const_tarval(r);
670                 } else {
671                         ptr = r;
672                         tv  = get_Const_tarval(l);
673                 }
674 ptr_arith:
675                 mode = get_tarval_mode(tv);
676                 tmp  = tv;
677
678                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
679                 if (is_Sel(ptr)) {
680                         field = get_Sel_entity(ptr);
681                 } else {
682                         field = get_SymConst_entity(ptr);
683                 }
684                 idx = 0;
685                 for (ent = field;;) {
686                         unsigned size;
687                         tarval   *sz, *tv_index, *tlower, *tupper;
688                         ir_node  *bound;
689
690                         tp = get_entity_type(ent);
691                         if (! is_Array_type(tp))
692                                 break;
693                         ent = get_array_element_entity(tp);
694                         size = get_type_size_bytes(get_entity_type(ent));
695                         sz   = new_tarval_from_long(size, mode);
696
697                         tv_index = tarval_div(tmp, sz);
698                         tmp      = tarval_mod(tmp, sz);
699
700                         if (tv_index == tarval_bad || tmp == tarval_bad)
701                                 return NULL;
702
703                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
704                         bound  = get_array_lower_bound(tp, 0);
705                         tlower = computed_value(bound);
706                         bound  = get_array_upper_bound(tp, 0);
707                         tupper = computed_value(bound);
708
709                         if (tlower == tarval_bad || tupper == tarval_bad)
710                                 return NULL;
711
712                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
713                                 return NULL;
714                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
715                                 return NULL;
716
717                         /* ok, bounds check finished */
718                         ++idx;
719                 }
720                 if (! tarval_is_null(tmp)) {
721                         /* access to some struct/union member */
722                         return NULL;
723                 }
724
725                 /* should be at least ONE array */
726                 if (idx == 0)
727                         return NULL;
728
729                 res = rec_get_accessed_path(ptr, depth + idx);
730                 if (res == NULL)
731                         return NULL;
732
733                 path_len = get_compound_graph_path_length(res);
734                 pos      = path_len - depth - idx;
735
736                 for (ent = field;;) {
737                         unsigned size;
738                         tarval   *sz, *tv_index;
739                         long     index;
740
741                         tp = get_entity_type(ent);
742                         if (! is_Array_type(tp))
743                                 break;
744                         ent = get_array_element_entity(tp);
745                         set_compound_graph_path_node(res, pos, ent);
746
747                         size = get_type_size_bytes(get_entity_type(ent));
748                         sz   = new_tarval_from_long(size, mode);
749
750                         tv_index = tarval_div(tv, sz);
751                         tv       = tarval_mod(tv, sz);
752
753                         /* worked above, should work again */
754                         assert(tv_index != tarval_bad && tv != tarval_bad);
755
756                         /* bounds already checked above */
757                         index = get_tarval_long(tv_index);
758                         set_compound_graph_path_array_index(res, pos, index);
759                         ++pos;
760                 }
761         } else if (is_Sub(ptr)) {
762                 ir_node *l = get_Sub_left(ptr);
763                 ir_node *r = get_Sub_right(ptr);
764
765                 ptr = l;
766                 tv  = get_Const_tarval(r);
767                 tv  = tarval_neg(tv);
768                 goto ptr_arith;
769         }
770         return res;
771 }  /* rec_get_accessed_path */
772
773 /**
774  * Returns an access path or NULL.  The access path is only
775  * valid, if the graph is in phase_high and _no_ address computation is used.
776  */
777 static compound_graph_path *get_accessed_path(ir_node *ptr) {
778         compound_graph_path *gr = rec_get_accessed_path(ptr, 0);
779         return gr;
780 }  /* get_accessed_path */
781
782 typedef struct path_entry {
783         ir_entity         *ent;
784         struct path_entry *next;
785         long              index;
786 } path_entry;
787
788 static ir_node *rec_find_compound_ent_value(ir_node *ptr, path_entry *next) {
789         path_entry       entry, *p;
790         ir_entity        *ent, *field;
791         ir_initializer_t *initializer;
792         tarval           *tv;
793         ir_type          *tp;
794         unsigned         n;
795
796         entry.next = next;
797         if (is_SymConst(ptr)) {
798                 /* found the root */
799                 ent         = get_SymConst_entity(ptr);
800                 initializer = get_entity_initializer(ent);
801                 for (p = next; p != NULL;) {
802                         if (initializer->kind != IR_INITIALIZER_COMPOUND)
803                                 return NULL;
804                         n  = get_initializer_compound_n_entries(initializer);
805                         tp = get_entity_type(ent);
806
807                         if (is_Array_type(tp)) {
808                                 ent = get_array_element_entity(tp);
809                                 if (ent != p->ent) {
810                                         /* a missing [0] */
811                                         if (0 >= n)
812                                                 return NULL;
813                                         initializer = get_initializer_compound_value(initializer, 0);
814                                         continue;
815                                 }
816                         }
817                         if (p->index >= (int) n)
818                                 return NULL;
819                         initializer = get_initializer_compound_value(initializer, p->index);
820
821                         ent = p->ent;
822                         p   = p->next;
823                 }
824                 tp = get_entity_type(ent);
825                 while (is_Array_type(tp)) {
826                         ent = get_array_element_entity(tp);
827                         tp = get_entity_type(ent);
828                         /* a missing [0] */
829                         n  = get_initializer_compound_n_entries(initializer);
830                         if (0 >= n)
831                                 return NULL;
832                         initializer = get_initializer_compound_value(initializer, 0);
833                 }
834
835                 switch (initializer->kind) {
836                 case IR_INITIALIZER_CONST:
837                         return get_initializer_const_value(initializer);
838                 case IR_INITIALIZER_TARVAL:
839                 case IR_INITIALIZER_NULL:
840                 default:
841                         return NULL;
842                 }
843         } else if (is_Sel(ptr)) {
844                 entry.ent = field = get_Sel_entity(ptr);
845                 tp = get_entity_owner(field);
846                 if (is_Array_type(tp)) {
847                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
848                         entry.index = get_Sel_array_index_long(ptr, 0) - get_array_lower_bound_int(tp, 0);
849                 } else {
850                         int i, n_members = get_compound_n_members(tp);
851                         for (i = 0; i < n_members; ++i) {
852                                 if (get_compound_member(tp, i) == field)
853                                         break;
854                         }
855                         if (i >= n_members) {
856                                 /* not found: should NOT happen */
857                                 return NULL;
858                         }
859                         entry.index = i;
860                 }
861                 return rec_find_compound_ent_value(get_Sel_ptr(ptr), &entry);
862         }  else if (is_Add(ptr)) {
863                 ir_node  *l = get_Add_left(ptr);
864                 ir_node  *r = get_Add_right(ptr);
865                 ir_mode  *mode;
866                 unsigned pos;
867
868                 if (is_Const(r)) {
869                         ptr = l;
870                         tv  = get_Const_tarval(r);
871                 } else {
872                         ptr = r;
873                         tv  = get_Const_tarval(l);
874                 }
875 ptr_arith:
876                 mode = get_tarval_mode(tv);
877
878                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
879                 if (is_Sel(ptr)) {
880                         field = get_Sel_entity(ptr);
881                 } else {
882                         field = get_SymConst_entity(ptr);
883                 }
884
885                 /* count needed entries */
886                 pos = 0;
887                 for (ent = field;;) {
888                         tp = get_entity_type(ent);
889                         if (! is_Array_type(tp))
890                                 break;
891                         ent = get_array_element_entity(tp);
892                         ++pos;
893                 }
894                 /* should be at least ONE entry */
895                 if (pos == 0)
896                         return NULL;
897
898                 /* allocate the right number of entries */
899                 NEW_ARR_A(path_entry, p, pos);
900
901                 /* fill them up */
902                 pos = 0;
903                 for (ent = field;;) {
904                         unsigned size;
905                         tarval   *sz, *tv_index, *tlower, *tupper;
906                         long     index;
907                         ir_node  *bound;
908
909                         tp = get_entity_type(ent);
910                         if (! is_Array_type(tp))
911                                 break;
912                         ent = get_array_element_entity(tp);
913                         p[pos].ent  = ent;
914                         p[pos].next = &p[pos + 1];
915
916                         size = get_type_size_bytes(get_entity_type(ent));
917                         sz   = new_tarval_from_long(size, mode);
918
919                         tv_index = tarval_div(tv, sz);
920                         tv       = tarval_mod(tv, sz);
921
922                         if (tv_index == tarval_bad || tv == tarval_bad)
923                                 return NULL;
924
925                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
926                         bound  = get_array_lower_bound(tp, 0);
927                         tlower = computed_value(bound);
928                         bound  = get_array_upper_bound(tp, 0);
929                         tupper = computed_value(bound);
930
931                         if (tlower == tarval_bad || tupper == tarval_bad)
932                                 return NULL;
933
934                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
935                                 return NULL;
936                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
937                                 return NULL;
938
939                         /* ok, bounds check finished */
940                         index = get_tarval_long(tv_index);
941                         p[pos].index = index;
942                         ++pos;
943                 }
944                 if (! tarval_is_null(tv)) {
945                         /* hmm, wrong access */
946                         return NULL;
947                 }
948                 p[pos - 1].next = next;
949                 return rec_find_compound_ent_value(ptr, p);
950         } else if (is_Sub(ptr)) {
951                 ir_node *l = get_Sub_left(ptr);
952                 ir_node *r = get_Sub_right(ptr);
953
954                 ptr = l;
955                 tv  = get_Const_tarval(r);
956                 tv  = tarval_neg(tv);
957                 goto ptr_arith;
958         }
959         return NULL;
960 }  /* rec_find_compound_ent_value */
961
962 static ir_node *find_compound_ent_value(ir_node *ptr) {
963         return rec_find_compound_ent_value(ptr, NULL);
964 }  /* find_compound_ent_value */
965
966 /**
967  * Mark a Load memop to be replace by a definition
968  *
969  * @param op  the Load memop
970  */
971 static void mark_replace_load(memop_t *op, ir_node *def) {
972         op->replace = def;
973         op->flags |= FLAG_KILLED_NODE;
974         env.changed = 1;
975 }  /* mark_replace_load */
976
977 /**
978  * Mark a Store memop to be removed.
979  *
980  * @param op  the Store memop
981  */
982 static void mark_remove_store(memop_t *op) {
983         op->flags |= FLAG_KILLED_NODE;
984         env.changed = 1;
985 }  /* mark_remove_store */
986
987 /**
988  * Update a memop for a Load.
989  *
990  * @param m  the memop
991  */
992 static void update_Load_memop(memop_t *m) {
993         int       i;
994         ir_node   *load = m->node;
995         ir_node   *ptr;
996         ir_entity *ent;
997
998         if (get_Load_volatility(load) == volatility_is_volatile)
999                 m->flags |= FLAG_IGNORE;
1000
1001         ptr = get_Load_ptr(load);
1002
1003         m->value.address = ptr;
1004
1005         for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
1006                 ir_node *proj = get_irn_out(load, i);
1007                 long    pn;
1008
1009                 /* beware of keep edges */
1010                 if (is_End(proj))
1011                         continue;
1012
1013                 pn = get_Proj_proj(proj);
1014                 m->projs[pn] = proj;
1015                 switch (pn) {
1016                 case pn_Load_res:
1017                         m->value.value = proj;
1018                         m->value.mode  = get_irn_mode(proj);
1019                         break;
1020                 case pn_Load_X_except:
1021                         m->flags |= FLAG_EXCEPTION;
1022                         break;
1023                 case pn_Load_M:
1024                         m->mem = proj;
1025                         break;
1026                 case pn_Load_X_regular:
1027                         break;
1028                 default:
1029                         panic("Unsupported Proj from Load %+F", proj);
1030                 }
1031         }
1032
1033         /* check if we can determine the entity that will be loaded */
1034         ent = find_constant_entity(ptr);
1035
1036         if (ent != NULL                                     &&
1037                 allocation_static == get_entity_allocation(ent) &&
1038                 visibility_external_allocated != get_entity_visibility(ent)) {
1039                 /* a static allocation that is not external: there should be NO exception
1040                  * when loading even if we cannot replace the load itself. */
1041                 ir_node *value = NULL;
1042
1043                 /* no exception, clear the m fields as it might be checked later again */
1044                 if (m->projs[pn_Load_X_except]) {
1045                         exchange(m->projs[pn_Load_X_except], new_Bad());
1046                         m->projs[pn_Load_X_except] = NULL;
1047                         m->flags &= ~FLAG_EXCEPTION;
1048                         env.changed = 1;
1049                 }
1050                 if (m->projs[pn_Load_X_regular]) {
1051                         exchange(m->projs[pn_Load_X_regular], new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1052                         m->projs[pn_Load_X_regular] = NULL;
1053                         env.changed = 1;
1054                 }
1055
1056                 if (variability_constant == get_entity_variability(ent)) {
1057                         if (is_atomic_entity(ent)) {
1058                                 /* Might not be atomic after lowering of Sels.  In this case we
1059                                  * could also load, but it's more complicated. */
1060                                 /* more simpler case: we load the content of a constant value:
1061                                  * replace it by the constant itself */
1062                                 value = get_atomic_ent_value(ent);
1063                         } else if (ent->has_initializer) {
1064                                 /* new style initializer */
1065                                 value = find_compound_ent_value(ptr);
1066                         } else {
1067                                 /* old style initializer */
1068                                 compound_graph_path *path = get_accessed_path(ptr);
1069
1070                                 if (path != NULL) {
1071                                         assert(is_proper_compound_graph_path(path, get_compound_graph_path_length(path)-1));
1072
1073                                         value = get_compound_ent_value_by_path(ent, path);
1074                                         DB((dbg, LEVEL_1, "  Constant access at %F%F resulted in %+F\n", ent, path, value));
1075                                         free_compound_graph_path(path);
1076                                 }
1077                         }
1078                         if (value != NULL)
1079                                 value = can_replace_load_by_const(load, value);
1080                 }
1081
1082                 if (value != NULL) {
1083                         /* we completely replace the load by this value */
1084                         DB((dbg, LEVEL_1, "Replacing Load %+F by constant %+F\n", m->node, value));
1085                         mark_replace_load(m, value);
1086                         return;
1087                 }
1088         }
1089
1090         if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
1091                 /* only create an address if this node is NOT killed immediately or ignored */
1092                 m->value.id = register_address(ptr);
1093                 ++env.n_mem_ops;
1094         } else {
1095                 /* no user, KILL it */
1096                 mark_replace_load(m, NULL);
1097         }
1098 }  /* update_Load_memop */
1099
1100 /**
1101  * Update a memop for a Store.
1102  *
1103  * @param m  the memop
1104  */
1105 static void update_Store_memop(memop_t *m) {
1106         int     i;
1107         ir_node *store = m->node;
1108         ir_node *adr   = get_Store_ptr(store);
1109
1110         if (get_Store_volatility(store) == volatility_is_volatile) {
1111                 m->flags |= FLAG_IGNORE;
1112         } else {
1113                 /* only create an address if this node is NOT ignored */
1114                 m->value.id = register_address(adr);
1115                 ++env.n_mem_ops;
1116         }
1117
1118         m->value.address = adr;
1119
1120         for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
1121                 ir_node *proj = get_irn_out(store, i);
1122                 long    pn;
1123
1124                 /* beware of keep edges */
1125                 if (is_End(proj))
1126                         continue;
1127
1128                 pn = get_Proj_proj(proj);
1129                 m->projs[pn] = proj;
1130                 switch (pn) {
1131                 case pn_Store_X_except:
1132                         m->flags |= FLAG_EXCEPTION;
1133                         break;
1134                 case pn_Store_M:
1135                         m->mem = proj;
1136                         break;
1137                 case pn_Store_X_regular:
1138                         break;
1139                 default:
1140                         panic("Unsupported Proj from Store %+F", proj);
1141                 }
1142         }
1143         m->value.value = get_Store_value(store);
1144         m->value.mode  = get_irn_mode(m->value.value);
1145 }  /* update_Store_memop */
1146
1147 /**
1148  * Update a memop for a Call.
1149  *
1150  * @param m  the memop
1151  */
1152 static void update_Call_memop(memop_t *m) {
1153         ir_node  *call = m->node;
1154         unsigned prop  = get_Call_memory_properties(call);
1155         int      i;
1156
1157         if (prop & mtp_property_const) {
1158                 /* A constant call did NOT use memory at all, we
1159                    can kick it from the list. */
1160         } else if (prop & mtp_property_pure) {
1161                 /* pure calls READ memory */
1162                 m->flags = 0;
1163         } else
1164                 m->flags = FLAG_KILL_ALL;
1165
1166         for (i = get_irn_n_outs(call) - 1; i >= 0; --i) {
1167                 ir_node *proj = get_irn_out(call, i);
1168
1169                 /* beware of keep edges */
1170                 if (is_End(proj))
1171                         continue;
1172
1173                 switch (get_Proj_proj(proj)) {
1174                 case pn_Call_X_except:
1175                         m->flags |= FLAG_EXCEPTION;
1176                         break;
1177                 case pn_Call_M_regular:
1178                         m->mem = proj;
1179                         break;
1180                 }
1181         }
1182 }  /* update_Call_memop */
1183
1184 /**
1185  * Update a memop for a Div/Mod/Quot/DivMod.
1186  *
1187  * @param m  the memop
1188  */
1189 static void update_DivOp_memop(memop_t *m) {
1190         ir_node *div = m->node;
1191         int     i;
1192
1193         for (i = get_irn_n_outs(div) - 1; i >= 0; --i) {
1194                 ir_node *proj = get_irn_out(div, i);
1195
1196                 /* beware of keep edges */
1197                 if (is_End(proj))
1198                         continue;
1199
1200                 switch (get_Proj_proj(proj)) {
1201                 case pn_Generic_X_except:
1202                         m->flags |= FLAG_EXCEPTION;
1203                         break;
1204                 case pn_Generic_M_regular:
1205                         m->mem = proj;
1206                         break;
1207                 }
1208         }
1209 }  /* update_DivOp_memop */
1210
1211 /**
1212  * Update a memop for a Phi.
1213  *
1214  * @param m  the memop
1215  */
1216 static void update_Phi_memop(memop_t *m) {
1217         /* the Phi is it's own mem */
1218         m->mem = m->node;
1219 }  /* update_Phi_memop */
1220
1221 /**
1222  * Memory walker: collect all memory ops and build topological lists.
1223  */
1224 static void collect_memops(ir_node *irn, void *ctx) {
1225         memop_t  *op;
1226         ir_node  *block;
1227         block_t  *entry;
1228
1229         (void) ctx;
1230         if (is_Proj(irn)) {
1231                 /* we can safely ignore ProjM's except the initial memory */
1232                 if (irn != get_irg_initial_mem(current_ir_graph))
1233                         return;
1234         }
1235
1236         op    = alloc_memop(irn);
1237         block = get_nodes_block(irn);
1238         entry = get_block_entry(block);
1239
1240         if (is_Phi(irn)) {
1241                 update_Phi_memop(op);
1242                 /* Phis must be always placed first */
1243                 op->next = entry->memop_forward;
1244                 entry->memop_forward = op;
1245                 if (entry->memop_backward == NULL)
1246                         entry->memop_backward = op;
1247         } else {
1248                 switch (get_irn_opcode(irn)) {
1249                 case iro_Load:
1250                         update_Load_memop(op);
1251                         break;
1252                 case iro_Store:
1253                         update_Store_memop(op);
1254                         break;
1255                 case iro_Call:
1256                         update_Call_memop(op);
1257                         break;
1258                 case iro_Sync:
1259                 case iro_Pin:
1260                         op->mem = irn;
1261                         break;
1262                 case iro_Proj:
1263                         /* initial memory */
1264                         op->mem = irn;
1265                         break;
1266                 case iro_Return:
1267                 case iro_End:
1268                         /* we can those to find the memory edge */
1269                         break;
1270                 case iro_Div:
1271                 case iro_DivMod:
1272                 case iro_Quot:
1273                 case iro_Mod:
1274                         update_DivOp_memop(op);
1275                         break;
1276
1277                 case iro_Builtin:
1278                         /* TODO: handle some builtins */
1279                 default:
1280                         /* unsupported operation */
1281                         op->flags = FLAG_KILL_ALL;
1282                 }
1283
1284
1285                 /* all other should be placed last */
1286                 if (entry->memop_backward == NULL) {
1287                         entry->memop_forward = entry->memop_backward = op;
1288                 } else {
1289                         entry->memop_backward->next = op;
1290                         entry->memop_backward       = op;
1291                 }
1292         }
1293 }  /* collect_memops */
1294
1295 /**
1296  * Find an address in the current set.
1297  *
1298  * @param value  the value to be searched for
1299  *
1300  * @return a memop for the value or NULL if the value does
1301  *         not exists in the set or cannot be converted into
1302  *         the requested mode
1303  */
1304 static memop_t *find_address(const value_t *value) {
1305         if (rbitset_is_set(env.curr_set, value->id)) {
1306                 memop_t *res = env.curr_id_2_memop[value->id];
1307
1308                 if (res->value.mode == value->mode)
1309                         return res;
1310                 /* allow hidden casts */
1311                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1312                     get_mode_arithmetic(value->mode) == irma_twos_complement &&
1313                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1314                         return res;
1315         }
1316         return NULL;
1317 }  /* find_address */
1318
1319 /**
1320  * Find an address in the avail_out set.
1321  *
1322  * @param bl     the block
1323  * @param value  the value to be searched for
1324  */
1325 static memop_t *find_address_avail(const block_t *bl, const value_t *value) {
1326         if (rbitset_is_set(bl->avail_out, value->id)) {
1327                 memop_t *res = bl->id_2_memop_avail[value->id];
1328
1329                 if (res->value.mode == value->mode)
1330                         return res;
1331                 /* allow hidden casts */
1332                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1333                     get_mode_arithmetic(value->mode) == irma_twos_complement &&
1334                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1335                         return res;
1336         }
1337         return NULL;
1338 }  /* find_address_avail */
1339
1340 /**
1341  * Kill all addresses from the current set.
1342  */
1343 static void kill_all(void) {
1344         rbitset_clear_all(env.curr_set, env.rbs_size);
1345
1346         /* set sentinel */
1347         rbitset_set(env.curr_set, env.rbs_size - 1);
1348 }  /* kill_all */
1349
1350 /**
1351  * Kill memops that are not alias free due to a Store value from the current set.
1352  *
1353  * @param value  the Store value
1354  */
1355 static void kill_memops(const value_t *value) {
1356         unsigned end = env.rbs_size - 1;
1357         unsigned pos;
1358
1359         for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1360                 memop_t *op = env.curr_id_2_memop[pos];
1361
1362                 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
1363                                                           op->value.address, op->value.mode)) {
1364                         rbitset_clear(env.curr_set, pos);
1365                         env.curr_id_2_memop[pos] = NULL;
1366                         DB((dbg, LEVEL_2, "KILLING %+F because of possible alias address %+F\n", op->node, value->address));
1367                 }
1368         }
1369 }  /* kill_memops */
1370
1371 /**
1372  * Add the value of a memop to the current set.
1373  *
1374  * @param op  the memory op
1375  */
1376 static void add_memop(memop_t *op) {
1377         rbitset_set(env.curr_set, op->value.id);
1378         env.curr_id_2_memop[op->value.id] = op;
1379 }  /* add_memop */
1380
1381 /**
1382  * Add the value of a memop to the avail_out set.
1383  *
1384  * @param bl  the block
1385  * @param op  the memory op
1386  */
1387 static void add_memop_avail(block_t *bl, memop_t *op) {
1388         rbitset_set(bl->avail_out, op->value.id);
1389         bl->id_2_memop_avail[op->value.id] = op;
1390 }  /* add_memop_avail */
1391
1392 /**
1393  * Check, if we can convert a value of one mode to another mode
1394  * without changing the representation of bits.
1395  *
1396  * @param from  the original mode
1397  * @param to    the destination mode
1398  */
1399 static int can_convert_to(const ir_mode *from, const ir_mode *to) {
1400         if (get_mode_arithmetic(from) == irma_twos_complement &&
1401             get_mode_arithmetic(to) == irma_twos_complement &&
1402             get_mode_size_bits(from) == get_mode_size_bits(to))
1403                 return 1;
1404         return 0;
1405 }  /* can_convert_to */
1406
1407 /**
1408  * Add a Conv to the requested mode if needed.
1409  *
1410  * @param irn   the IR-node to convert
1411  * @param mode  the destination mode
1412  *
1413  * @return the possible converted node or NULL
1414  *         if the conversion is not possible
1415  */
1416 static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
1417         ir_mode *other = get_irn_mode(irn);
1418         if (other != mode) {
1419                 /* different modes: check if conversion is possible without changing the bits */
1420                 if (can_convert_to(other, mode)) {
1421                         ir_node *block = get_nodes_block(irn);
1422                         return new_r_Conv(current_ir_graph, block, irn, mode);
1423                 }
1424                 /* otherwise not possible ... yet */
1425                 return NULL;
1426         }
1427         return irn;
1428 }  /* conv_to */
1429
1430 /**
1431  * Update the address of an value if this address was a load result
1432  * and the load is killed now.
1433  *
1434  * @param value  the value whose address is updated
1435  */
1436 static void update_address(value_t *value) {
1437         if (is_Proj(value->address)) {
1438                 ir_node *load = get_Proj_pred(value->address);
1439
1440                 if (is_Load(load)) {
1441                         const memop_t *op = get_irn_memop(load);
1442
1443                         if (op->flags & FLAG_KILLED_NODE)
1444                                 value->address = op->replace;
1445                 }
1446         }
1447 }  /* update_address */
1448
1449 /**
1450  * Do forward dataflow analysis on the given block and calculate the
1451  * GEN and KILL in the current (avail) set.
1452  *
1453  * @param bl  the block
1454  */
1455 static void calc_gen_kill_avail(block_t *bl) {
1456         memop_t *op;
1457         ir_node *def;
1458
1459         for (op = bl->memop_forward; op != NULL; op = op->next) {
1460                 switch (get_irn_opcode(op->node)) {
1461                 case iro_Phi:
1462                         /* meet */
1463                         break;
1464                 case iro_Sync:
1465                         /* join */
1466                         break;
1467                 case iro_Load:
1468                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1469                                 /* do we have this already? */
1470                                 memop_t *other;
1471
1472                                 update_address(&op->value);
1473                                 other = find_address(&op->value);
1474                                 if (other != NULL && other != op) {
1475                                         def = conv_to(other->value.value, op->value.mode);
1476                                         if (def != NULL) {
1477 #ifdef DEBUG_libfirm
1478                                                 if (is_Store(other->node)) {
1479                                                         /* RAW */
1480                                                         DB((dbg, LEVEL_1, "RAW %+F <- %+F(%+F)\n", op->node, def, other->node));
1481                                                 } else {
1482                                                         /* RAR */
1483                                                         DB((dbg, LEVEL_1, "RAR %+F <- %+F(%+F)\n", op->node, def, other->node));
1484                                                 }
1485 #endif
1486                                                 mark_replace_load(op, def);
1487                                                 /* do NOT change the memop table */
1488                                                 continue;
1489                                         }
1490                                 }
1491                                 /* add this value */
1492                                 add_memop(op);
1493                         }
1494                         break;
1495                 case iro_Store:
1496                         if (! (op->flags & FLAG_KILLED_NODE)) {
1497                                 /* do we have this store already */
1498                                 memop_t *other;
1499
1500                                 update_address(&op->value);
1501                                 other = find_address(&op->value);
1502                                 if (other != NULL) {
1503                                         if (is_Store(other->node)) {
1504                                                 if (op != other && !(other->flags & FLAG_IGNORE) &&
1505                                                     get_nodes_block(other->node) == get_nodes_block(op->node)) {
1506                                                         /*
1507                                                          * A WAW in the same block we can kick the first store.
1508                                                          * This is a shortcut: we know that the second Store will be anticipated
1509                                                          * then in an case.
1510                                                          */
1511                                                         DB((dbg, LEVEL_1, "WAW %+F <- %+F\n", other->node, op->node));
1512                                                         mark_remove_store(other);
1513                                                         /* FIXME: a Load might be get freed due to this killed store */
1514                                                 }
1515                                         } else if (other->value.value == op->value.value && !(op->flags & FLAG_IGNORE)) {
1516                                                 /* WAR */
1517                                                 DB((dbg, LEVEL_1, "WAR %+F <- %+F\n", op->node, other->node));
1518                                                 mark_remove_store(op);
1519                                                 /* do NOT change the memop table */
1520                                                 continue;
1521                                         }
1522                                 }
1523                                 /* KILL all possible aliases */
1524                                 kill_memops(&op->value);
1525                                 /* add this value */
1526                                 add_memop(op);
1527                         }
1528                         break;
1529                 default:
1530                         if (op->flags & FLAG_KILL_ALL)
1531                                 kill_all();
1532                 }
1533         }
1534 }  /* calc_gen_kill_avail */
1535
1536 #define BYTE_SIZE(x)  (((x) + 7) >> 3)
1537
1538 /**
1539  * Do forward dataflow analysis on a given block to calculate the avail_out set
1540  * for this block only.
1541  *
1542  * @param block  the block
1543  */
1544 static void forward_avail(block_t *bl) {
1545         /* fill the data from the current block */
1546         env.curr_id_2_memop = bl->id_2_memop_avail;
1547         env.curr_set        = bl->avail_out;
1548
1549         calc_gen_kill_avail(bl);
1550         dump_curr(bl, "Avail_out");
1551 }  /* forward_avail */
1552
1553 /**
1554  * Do backward dataflow analysis on a given block to calculate the antic set
1555  * of Loaded addresses.
1556  *
1557  * @param bl  the block
1558  *
1559  * @return non-zero if the set has changed since last iteration
1560  */
1561 static int backward_antic(block_t *bl) {
1562         memop_t *op;
1563         ir_node *block = bl->block;
1564         int     n = get_Block_n_cfg_outs(block);
1565
1566         if (n == 1) {
1567                 ir_node  *succ    = get_Block_cfg_out(block, 0);
1568                 block_t  *succ_bl = get_block_entry(succ);
1569                 int      pred_pos = get_Block_cfgpred_pos(succ, block);
1570                 unsigned end      = env.rbs_size;
1571                 unsigned pos;
1572
1573                 kill_all();
1574
1575                 if (bl->trans_results == NULL) {
1576                         /* allocate the translate cache */
1577                         unsigned size = env.curr_adr_id * sizeof(bl->trans_results[0]);
1578                         bl->trans_results = obstack_alloc(&env.obst, size);
1579                         memset(bl->trans_results, 0, size);
1580                 }
1581
1582                 /* check for partly redundant values */
1583                 for (pos = rbitset_next(succ_bl->anticL_in, 0, 1);
1584                      pos < end;
1585                      pos = rbitset_next(succ_bl->anticL_in, pos + 1, 1)) {
1586                         /*
1587                          * do Phi-translation here: Note that at this point the nodes are
1588                          * not changed, so we can safely cache the results.
1589                          * However: Loads of Load results ARE bad, because we have no way
1590                           to translate them yet ...
1591                          */
1592                         memop_t *op = bl->trans_results[pos];
1593                         if (op == NULL) {
1594                                 /* not yet translated */
1595                                 ir_node *adr, *trans_adr;
1596
1597                                 op  = succ_bl->id_2_memop_antic[pos];
1598                                 adr = op->value.address;
1599
1600                                 trans_adr = phi_translate(adr, succ, pred_pos);
1601                                 if (trans_adr != adr) {
1602                                         /* create a new entry for the translated one */
1603                                         memop_t *new_op;
1604
1605                                         new_op = alloc_memop(NULL);
1606                                         new_op->value.address = trans_adr;
1607                                         new_op->value.id      = register_address(trans_adr);
1608                                         new_op->value.mode    = op->value.mode;
1609                                         new_op->node          = op->node; /* we need the node to decide if Load/Store */
1610                                         new_op->flags         = op->flags;
1611
1612                                         bl->trans_results[pos] = op;
1613                                         op = new_op;
1614                                 }
1615                         }
1616                         rbitset_set(env.curr_set, op->value.id);
1617                         env.curr_id_2_memop[pos] = op;
1618                 }
1619         } else if (n > 1) {
1620                 ir_node *succ    = get_Block_cfg_out(block, 0);
1621                 block_t *succ_bl = get_block_entry(succ);
1622                 int i;
1623
1624                 rbitset_cpy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1625                 memcpy(env.curr_id_2_memop, succ_bl->id_2_memop_antic, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1626
1627                 /* Hmm: probably we want kill merges of Loads ans Stores here */
1628                 for (i = n - 1; i > 0; --i) {
1629                         ir_node *succ    = get_Block_cfg_out(bl->block, i);
1630                         block_t *succ_bl = get_block_entry(succ);
1631
1632                         rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1633                 }
1634         } else {
1635                 /* block ends with a noreturn call */
1636                 kill_all();
1637         }
1638
1639         dump_curr(bl, "AnticL_out");
1640
1641         for (op = bl->memop_backward; op != NULL; op = op->prev) {
1642                 switch (get_irn_opcode(op->node)) {
1643                 case iro_Phi:
1644                         /* meet */
1645                         break;
1646                 case iro_Sync:
1647                         /* join */
1648                         break;
1649                 case iro_Load:
1650                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1651                                 /* always add it */
1652                                 add_memop(op);
1653                         }
1654                         break;
1655                 case iro_Store:
1656                         if (! (op->flags & FLAG_KILLED_NODE)) {
1657                                 /* a Store: check which memops must be killed */
1658                                 kill_memops(&op->value);
1659                         }
1660                         break;
1661                 default:
1662                         if (op->flags & FLAG_KILL_ALL)
1663                                 kill_all();
1664                 }
1665         }
1666
1667         memcpy(bl->id_2_memop_antic, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1668         if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1669                 /* changed */
1670                 rbitset_cpy(bl->anticL_in, env.curr_set, env.rbs_size);
1671                 dump_curr(bl, "AnticL_in*");
1672                 return 1;
1673         }
1674         dump_curr(bl, "AnticL_in");
1675         return 0;
1676 }  /* backward_antic */
1677
1678 /**
1679  * Replace a Load memop by a already known value.
1680  *
1681  * @param op  the Load memop
1682  */
1683 static void replace_load(memop_t *op) {
1684         ir_node *load = op->node;
1685         ir_node *def  = skip_Id(op->replace);
1686         ir_node *proj;
1687         ir_mode *mode;
1688
1689         if (def != NULL)
1690                 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1691         else {
1692                 if (op->flags & FLAG_EXCEPTION) {
1693                         /* bad: this node is unused and executed for exception only */
1694                         DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1695                         return;
1696                 }
1697                 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1698         }
1699
1700         if (op->mem != NULL) {
1701                 /* in rare cases a Load might have NO memory */
1702                 exchange(op->mem, get_Load_mem(load));
1703         }
1704         proj = op->projs[pn_Load_res];
1705         if (proj != NULL) {
1706                 mode = get_irn_mode(proj);
1707                 if (get_irn_mode(def) != mode) {
1708                         /* a hidden cast */
1709                         dbg_info *db    = get_irn_dbg_info(load);
1710                         ir_node  *block = get_nodes_block(proj);
1711                         def = new_rd_Conv(db, current_ir_graph, block, def, mode);
1712                 }
1713                 exchange(proj, def);
1714         }
1715         proj = op->projs[pn_Load_X_except];
1716         if (proj != NULL) {
1717                 exchange(proj, new_Bad());
1718         }
1719         proj = op->projs[pn_Load_X_regular];
1720         if (proj != NULL) {
1721                 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1722         }
1723 }  /* replace_load */
1724
1725 /**
1726  * Remove a Store memop.
1727  *
1728  * @param op  the Store memop
1729  */
1730 static void remove_store(memop_t *op) {
1731         ir_node *store = op->node;
1732         ir_node *proj;
1733
1734         DB((dbg, LEVEL_1, "Removing %+F\n", store));
1735
1736         if (op->mem != NULL) {
1737                 /* in rare cases a Store might have no memory */
1738                 exchange(op->mem, get_Store_mem(store));
1739         }
1740         proj = op->projs[pn_Store_X_except];
1741         if (proj != NULL) {
1742                 exchange(proj, new_Bad());
1743         }
1744         proj = op->projs[pn_Store_X_regular];
1745         if (proj != NULL) {
1746                 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(store)));
1747         }
1748 }  /* remove_store */
1749
1750
1751 /**
1752  * Do all necessary replacements for a given block.
1753  *
1754  * @param bl  the block
1755  */
1756 static void do_replacements(block_t *bl) {
1757         memop_t *op;
1758
1759         for (op = bl->memop_forward; op != NULL; op = op->next) {
1760                 if (op->flags & FLAG_KILLED_NODE) {
1761                         switch (get_irn_opcode(op->node)) {
1762                         case iro_Load:
1763                                 replace_load(op);
1764                                 break;
1765                         case iro_Store:
1766                                 remove_store(op);
1767                                 break;
1768                         }
1769                 }
1770         }
1771 }  /* do_replacements */
1772
1773 /**
1774  * Calculate the Avail_out sets for all basic blocks.
1775  */
1776 static void calcAvail(void) {
1777         memop_t  **tmp_memop = env.curr_id_2_memop;
1778         unsigned *tmp_set    = env.curr_set;
1779         block_t  *bl;
1780
1781         /* calculate avail_out */
1782         DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1783
1784         /* iterate over all blocks in in any order, skip the start block */
1785         for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1786                 forward_avail(bl);
1787         }
1788
1789         /* restore the current sets */
1790         env.curr_id_2_memop = tmp_memop;
1791         env.curr_set        = tmp_set;
1792 }  /* calcAvail */
1793
1794 /**
1795  * Calculate the Antic_in sets for all basic blocks.
1796  */
1797 static void calcAntic(void) {
1798         int i, need_iter;
1799
1800         /* calculate antic_out */
1801         DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1802         i = 0;
1803         do {
1804                 block_t *bl;
1805
1806                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1807
1808                 need_iter = 0;
1809
1810                 /* over all blocks in reverse post order */
1811                 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1812                         need_iter |= backward_antic(bl);
1813                 }
1814                 ++i;
1815         } while (need_iter);
1816         DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1817 }  /* calcAntic */
1818
1819 /**
1820  * Return the node representing the last memory in a block.
1821  *
1822  * @param bl  the block
1823  */
1824 static ir_node *find_last_memory(block_t *bl) {
1825         for (;;) {
1826                 if (bl->memop_backward != NULL) {
1827                         return bl->memop_backward->mem;
1828                 }
1829                 /* if there is NO memory in this block, go to the dominator */
1830                 bl = get_block_entry(get_Block_idom(bl->block));
1831         }
1832 }  /* find_last_memory */
1833
1834 /**
1835  * Reroute all memory users of old memory
1836  * to a new memory IR-node.
1837  *
1838  * @param omem  the old memory IR-node
1839  * @param nmem  the new memory IR-node
1840  */
1841 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
1842         int i;
1843
1844         for (i = get_irn_n_outs(omem) - 1; i >= 0; --i) {
1845                 int     n_pos;
1846                 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1847
1848                 set_irn_n(user, n_pos, nmem);
1849         }
1850
1851         /* all edges previously point to omem now point to nmem */
1852         nmem->out = omem->out;
1853 }  /* reroute_all_mem_users */
1854
1855 /**
1856  * Reroute memory users of old memory that are dominated by a given block
1857  * to a new memory IR-node.
1858  *
1859  * @param omem     the old memory IR-node
1860  * @param nmem     the new memory IR-node
1861  * @param pass_bl  the block the memory must pass
1862  */
1863 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl) {
1864         int             i, j, n = get_irn_n_outs(omem);
1865         ir_def_use_edge *edges = NEW_ARR_D(ir_def_use_edge, &env.obst, n + 1);
1866
1867         for (i = j = 0; i < n; ++i) {
1868                 int     n_pos;
1869                 ir_node *user   = get_irn_out_ex(omem, i, &n_pos);
1870                 ir_node *use_bl = get_nodes_block(user);
1871
1872
1873                 if (is_Phi(user)) {
1874                         use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1875                 }
1876                 if (block_dominates(pass_bl, use_bl)) {
1877                         /* found an user that is dominated */
1878                         ++j;
1879                         edges[j].pos = n_pos;
1880                         edges[j].use = user;
1881
1882                         set_irn_n(user, n_pos, nmem);
1883                 }
1884         }
1885
1886         /* Modify the out structure: we create a new out edge array on our
1887            temporary obstack here. This should be no problem, as we invalidate the edges
1888            at the end either. */
1889         /* first entry is used for the length */
1890         edges[0].pos = j;
1891         nmem->out = edges;
1892 }  /* reroute_mem_through */
1893
1894 /**
1895  * insert Loads, making partly redundant Loads fully redundant
1896  */
1897 static int insert_Load(block_t *bl) {
1898         ir_node  *block = bl->block;
1899         int      i, n = get_Block_n_cfgpreds(block);
1900         unsigned end = env.rbs_size - 1;
1901         unsigned pos;
1902
1903         DB((dbg, LEVEL_3, "processing %+F\n", block));
1904
1905         if (n == 0) {
1906                 /* might still happen for an unreachable block (end for instance) */
1907                 return 0;
1908         }
1909
1910         if (n > 1) {
1911                 ir_node **ins;
1912
1913                 NEW_ARR_A(ir_node *, ins, n);
1914
1915                 rbitset_set_all(env.curr_set, env.rbs_size);
1916
1917                 /* More than one predecessors, calculate the join for all avail_outs ignoring unevaluated
1918                    Blocks. These put in Top anyway. */
1919                 for (i = n - 1; i >= 0; --i) {
1920                         ir_node *pred = skip_Proj(get_Block_cfgpred(block, i));
1921                         ir_node *blk  = get_nodes_block(pred);
1922                         block_t *pred_bl;
1923
1924                         pred_bl = get_block_entry(blk);
1925                         rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
1926
1927                         if (is_Load(pred) || is_Store(pred)) {
1928                                 /* We reached this block by an exception from a Load or Store:
1929                                  * the memop creating the exception was NOT completed than, kill it
1930                                  */
1931                                 memop_t *exc_op = get_irn_memop(pred);
1932                                 rbitset_clear(env.curr_set, exc_op->value.id);
1933                         }
1934
1935                 }
1936                 /*
1937                  * Ensure that all values are in the map: build Phi's if necessary:
1938                  * Note: the last bit is the sentinel and ALWAYS set, so start with -2.
1939                  */
1940                 for (pos = env.rbs_size - 2; pos >= 0; --pos) {
1941                         if (! rbitset_is_set(env.curr_set, pos))
1942                                 env.curr_id_2_memop[pos] = NULL;
1943                         else {
1944                                 ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
1945                                 block_t *pred_bl = get_block_entry(pred);
1946                                 int     need_phi = 0;
1947                                 memop_t *first   = NULL;
1948                                 ir_mode *mode;
1949
1950                                 for (i = 0; i < n; ++i) {
1951                                         memop_t *mop;
1952
1953                                         pred    = get_Block_cfgpred_block(bl->block, i);
1954                                         pred_bl = get_block_entry(pred);
1955
1956                                         mop = pred_bl->id_2_memop_avail[pos];
1957                                         if (first == NULL) {
1958                                                 first = mop;
1959                                                 ins[0] = first->value.value;
1960                                                 mode = get_irn_mode(ins[0]);
1961
1962                                                 /* no Phi needed so far */
1963                                                 env.curr_id_2_memop[pos] = first;
1964                                         } else {
1965                                                 ins[i] = conv_to(mop->value.value, mode);
1966                                                 if (ins[i] != ins[0]) {
1967                                                         if (ins[i] == NULL) {
1968                                                                 /* conversion failed */
1969                                                                 env.curr_id_2_memop[pos] = NULL;
1970                                                                 rbitset_clear(env.curr_set, pos);
1971                                                                 break;
1972                                                         }
1973                                                         need_phi = 1;
1974                                                 }
1975                                         }
1976                                 }
1977                                 if (need_phi) {
1978                                         /* build a Phi  */
1979                                         ir_node *phi = new_r_Phi(current_ir_graph, bl->block, n, ins, mode);
1980                                         memop_t *phiop = alloc_memop(phi);
1981
1982                                         phiop->value = first->value;
1983                                         phiop->value.value = phi;
1984
1985                                         /* no need to link it in, as it is a DATA phi */
1986
1987                                         env.curr_id_2_memop[pos] = phiop;
1988
1989                                         DB((dbg, LEVEL_3, "Created new %+F on merging value for address %+F\n", phi, first->value.address));
1990                                 }
1991                         }
1992                 }
1993         } else {
1994                 /* only one predecessor, simply copy the map */
1995                 ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
1996                 block_t *pred_bl = get_block_entry(pred);
1997
1998                 rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
1999
2000                 memcpy(env.curr_id_2_memop, pred_bl->id_2_memop_avail, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
2001         }
2002
2003         if (n > 1) {
2004                 /* check for partly redundant values */
2005                 for (pos = rbitset_next(bl->anticL_in, 0, 1);
2006                      pos < end;
2007                      pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
2008                         memop_t *op = bl->id_2_memop_antic[pos];
2009                         int     have_some, all_same;
2010                         ir_node *first;
2011
2012                         if (rbitset_is_set(env.curr_set, pos)) {
2013                                 /* already avail */
2014                                 continue;
2015                         }
2016
2017                         assert(is_Load(op->node));
2018
2019                         DB((dbg, LEVEL_3, "anticipated %+F\n", op->node));
2020
2021                         have_some  = 0;
2022                         all_same   = 1;
2023                         first      = 0;
2024                         for (i = n - 1; i >= 0; --i) {
2025                                 ir_node *pred    = get_Block_cfgpred_block(block, i);
2026                                 block_t *pred_bl = get_block_entry(pred);
2027                                 memop_t *e       = find_address_avail(pred_bl, &op->value);
2028                                 ir_mode *mode    = op->value.mode;
2029
2030                                 if (e == NULL) {
2031                                         ir_node *ef_block = get_effective_block(op->value.address, block, i);
2032                                         if (! block_dominates(ef_block, pred)) {
2033                                                 /* cannot place a copy here */
2034                                                 have_some = 0;
2035                                                 DB((dbg, LEVEL_3, "%+F is cannot be moved into predecessor %+F\n", op->node, pred));
2036                                                 break;
2037                                         }
2038                                         DB((dbg, LEVEL_3, "%+F is not available in predecessor %+F\n", op->node, pred));
2039                                         pred_bl->avail = NULL;
2040                                         all_same       = 0;
2041                                 } else {
2042                                         if (e->value.mode != mode && !can_convert_to(e->value.mode, mode)) {
2043                                                 /* cannot create a Phi due to different modes */
2044                                                 have_some = 0;
2045                                                 break;
2046                                         }
2047                                         pred_bl->avail = e;
2048                                         have_some      = 1;
2049                                         DB((dbg, LEVEL_3, "%+F is available for %+F in predecessor %+F\n", e->node, op->node, pred));
2050                                         if (first == NULL)
2051                                                 first = e->node;
2052                                         else if (first != e->node)
2053                                                 all_same = 0;
2054                                 }
2055                         }
2056                         if (have_some && !all_same) {
2057                                 ir_mode *mode = op->value.mode;
2058                                 ir_node **in, *phi;
2059                                 memop_t *phi_op;
2060
2061                                 NEW_ARR_A(ir_node *, in, n);
2062
2063                                 for (i = n - 1; i >= 0; --i) {
2064                                         ir_node *pred    = get_Block_cfgpred_block(block, i);
2065                                         block_t *pred_bl = get_block_entry(pred);
2066
2067                                         if (pred_bl->avail == NULL) {
2068                                                 /* create a new Load here and make to make it fully redundant */
2069                                                 dbg_info *db       = get_irn_dbg_info(op->node);
2070                                                 ir_node  *last_mem = find_last_memory(pred_bl);
2071                                                 ir_node  *load, *def, *adr;
2072                                                 memop_t  *new_op;
2073
2074                                                 assert(last_mem != NULL);
2075                                                 adr  = phi_translate(op->value.address, block, i);
2076                                                 load = new_rd_Load(db, current_ir_graph, pred, last_mem, adr, mode, cons_none);
2077                                                 def  = new_r_Proj(current_ir_graph, pred, load, mode, pn_Load_res);
2078                                                 DB((dbg, LEVEL_1, "Created new %+F in %+F for party redundant %+F\n", load, pred, op->node));
2079
2080                                                 new_op                = alloc_memop(load);
2081                                                 new_op->mem           = new_r_Proj(current_ir_graph, pred, load, mode_M, pn_Load_M);
2082                                                 new_op->value.address = adr;
2083                                                 new_op->value.id      = op->value.id;
2084                                                 new_op->value.mode    = mode;
2085                                                 new_op->value.value   = def;
2086
2087                                                 new_op->projs[pn_Load_M]   = new_op->mem;
2088                                                 new_op->projs[pn_Load_res] = def;
2089
2090                                                 new_op->prev = pred_bl->memop_backward;
2091                                                 if (pred_bl->memop_backward != NULL)
2092                                                         pred_bl->memop_backward->next = new_op;
2093
2094                                                 pred_bl->memop_backward = new_op;
2095
2096                                                 if (pred_bl->memop_forward == NULL)
2097                                                         pred_bl->memop_forward = new_op;
2098
2099                                                 if (get_nodes_block(last_mem) == pred) {
2100                                                         /* We have add a new last memory op in pred block.
2101                                                            If pred had already a last mem, reroute all memory
2102                                                            users. */
2103                                                         reroute_all_mem_users(last_mem, new_op->mem);
2104                                                 } else {
2105                                                         /* reroute only those memory going through the pre block */
2106                                                         reroute_mem_through(last_mem, new_op->mem, pred);
2107                                                 }
2108
2109                                                 /* we added this load at the end, so it will be avail anyway */
2110                                                 add_memop_avail(pred_bl, new_op);
2111                                                 pred_bl->avail = new_op;
2112                                         }
2113                                         in[i] = conv_to(pred_bl->avail->value.value, mode);
2114                                 }
2115                                 phi = new_r_Phi(current_ir_graph, block, n, in, mode);
2116                                 DB((dbg, LEVEL_1, "Created new %+F in %+F for now redundant %+F\n", phi, block, op->node));
2117
2118                                 phi_op = clone_memop_phi(op, phi);
2119                                 add_memop(phi_op);
2120                         }
2121                 }
2122         }
2123
2124         /* recalculate avail by gen and kill */
2125         calc_gen_kill_avail(bl);
2126
2127         /* always update the map after gen/kill, as values might have been changed due to RAR/WAR/WAW */
2128         memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2129
2130         if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
2131                 /* the avail set has changed */
2132                 rbitset_cpy(bl->avail_out, env.curr_set, env.rbs_size);
2133                 dump_curr(bl, "Avail_out*");
2134                 return 1;
2135         }
2136         dump_curr(bl, "Avail_out");
2137         return 0;
2138 }  /* insert_Load */
2139
2140 /**
2141  * Insert Loads upwards.
2142  */
2143 static void insert_Loads_upwards(void) {
2144         int i, need_iter;
2145         block_t *bl;
2146
2147         /* recalculate antic_out and insert Loads */
2148         DB((dbg, LEVEL_2, "Inserting Loads\n"));
2149
2150         i = 0;
2151         do {
2152                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
2153
2154                 need_iter = 0;
2155
2156                 /* over all blocks in reverse post order, skip the start block */
2157                 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
2158                         need_iter |= insert_Load(bl);
2159                 }
2160                 ++i;
2161         } while (need_iter);
2162
2163         DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
2164 }  /* insert_Loads_upwards */
2165
2166 /**
2167  * Kill unreachable control flow.
2168  *
2169  * @param irg  the graph to operate on
2170  */
2171 static void kill_unreachable_blocks(ir_graph *irg) {
2172         block_t *bl;
2173         ir_node **ins;
2174         int     changed = 0;
2175
2176         NEW_ARR_A(ir_node *, ins, env.max_cfg_preds);
2177
2178         for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2179                 ir_node *block = bl->block;
2180                 int     i, j, k, n;
2181
2182                 assert(get_Block_mark(block));
2183
2184                 n = get_Block_n_cfgpreds(block);
2185
2186                 for (i = j = 0; i < n; ++i) {
2187                         ir_node *pred = get_Block_cfgpred(block, i);
2188                         ir_node *pred_bl;
2189
2190                         if (is_Bad(pred))
2191                                 continue;
2192
2193                         pred_bl = get_nodes_block(skip_Proj(pred));
2194                         if (! get_Block_mark(pred_bl))
2195                                 continue;
2196
2197                         ins[j++] = pred;
2198                 }
2199                 if (j != n) {
2200                         ir_node *phi, *next;
2201
2202                         /* some unreachable blocks detected */
2203                         changed = 1;
2204
2205                         DB((dbg, LEVEL_1, "Killing dead block predecessors on %+F\n", block));
2206
2207                         set_irn_in(block, j, ins);
2208
2209                         /* shorten all Phi nodes */
2210                         for (phi = get_Block_phis(block); phi != NULL; phi = next) {
2211                                 next = get_Phi_next(phi);
2212
2213                                 for (i = k = 0; i < n; ++i) {
2214                                         ir_node *pred = get_Block_cfgpred_block(block, i);
2215
2216                                         if (is_Bad(pred))
2217                                                 continue;
2218
2219                                         if (! get_Block_mark(pred))
2220                                                 continue;
2221
2222                                         ins[k++] = get_Phi_pred(phi, i);
2223                                 }
2224                                 if (k == 1)
2225                                         exchange(phi, ins[0]);
2226                                 else
2227                                         set_irn_in(phi, k, ins);
2228                         }
2229                 }
2230
2231         }
2232
2233         if (changed) {
2234                 /* kick keep alives */
2235                 ir_node *end = get_irg_end(irg);
2236                 int     i, j, n = get_End_n_keepalives(end);
2237
2238                 NEW_ARR_A(ir_node *, ins, n);
2239
2240                 for (i = j = 0; i < n; ++i) {
2241                         ir_node *ka = get_End_keepalive(end, i);
2242                         ir_node *ka_bl;
2243
2244                         if (is_Bad(ka))
2245                                 continue;
2246                         if (is_Block(ka))
2247                                 ka_bl = ka;
2248                         else
2249                                 ka_bl = get_nodes_block(skip_Proj(ka));
2250                         if (get_Block_mark(ka_bl))
2251                                 ins[j++] = ka;
2252                 }
2253                 if (j != n)
2254                         set_End_keepalives(end, j, ins);
2255
2256                 free_irg_outs(irg);
2257
2258                 /* this transformation do NOT invalidate the dominance */
2259         }
2260 }  /* kill_unreachable_blocks */
2261
2262 int opt_ldst(ir_graph *irg) {
2263         block_t  *bl;
2264         ir_graph *rem = current_ir_graph;
2265
2266         current_ir_graph = irg;
2267
2268         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
2269 //      firm_dbg_set_mask(dbg, -1);
2270
2271         DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
2272
2273         /* we need landing pads */
2274         remove_critical_cf_edges(irg);
2275
2276 //      dump_ir_block_graph(irg, "-XXX");
2277
2278         if (get_opt_alias_analysis()) {
2279                 assure_irg_entity_usage_computed(irg);
2280                 assure_irp_globals_entity_usage_computed();
2281         }
2282
2283         obstack_init(&env.obst);
2284         ir_nodemap_init(&env.adr_map);
2285
2286         env.forward       = NULL;
2287         env.backward      = NULL;
2288         env.curr_adr_id   = 0;
2289         env.n_mem_ops     = 0;
2290         env.max_cfg_preds = 0;
2291         env.changed       = 0;
2292         env.start_bl      = get_irg_start_block(irg);
2293         env.end_bl        = get_irg_end_block(irg);
2294 #ifdef DEBUG_libfirm
2295         env.id_2_address  = NEW_ARR_F(ir_node *, 0);
2296 #endif
2297
2298         assure_irg_outs(irg);
2299
2300         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2301
2302         /* first step: allocate block entries. Note that some blocks might be
2303            unreachable here. Using the normal walk ensures that ALL blocks are initialized. */
2304         irg_walk_graph(irg, prepare_blocks, link_phis, NULL);
2305
2306         /* produce an inverse post-order list for the CFG: this links only reachable
2307            blocks */
2308         irg_out_block_walk(get_irg_start_block(irg), NULL, inverse_post_order, NULL);
2309
2310         if (! get_Block_mark(env.end_bl)) {
2311                 /*
2312                  * The end block is NOT reachable due to endless loops
2313                  * or no_return calls.
2314                  * Place the end block last.
2315                  * env.backward points to the last block in the list for this purpose.
2316                  */
2317                 env.backward->forward_next = get_block_entry(env.end_bl);
2318
2319                 set_Block_mark(env.end_bl, 1);
2320         }
2321
2322         /* KILL unreachable blocks: these disturb the data flow analysis */
2323         kill_unreachable_blocks(irg);
2324
2325         assure_doms(irg);
2326
2327         /* second step: find and sort all memory ops */
2328         walk_memory_irg(irg, collect_memops, NULL, NULL);
2329
2330 #ifdef DEBUG_libfirm
2331         /* check that the backward map is correct */
2332         assert((unsigned)ARR_LEN(env.id_2_address) == env.curr_adr_id);
2333 #endif
2334
2335         if (env.n_mem_ops == 0) {
2336                 /* no memory ops */
2337                 goto end;
2338         }
2339
2340         /* create the backward links. */
2341         env.backward = NULL;
2342         irg_block_walk_graph(irg, NULL, collect_backward, NULL);
2343
2344         /* link the end block in */
2345         bl = get_block_entry(env.end_bl);
2346         bl->backward_next = env.backward;
2347         env.backward      = bl;
2348
2349         /* check that we really start with the start / end block */
2350         assert(env.forward->block  == env.start_bl);
2351         assert(env.backward->block == env.end_bl);
2352
2353         /* create address sets: for now, only the existing addresses are allowed plus one
2354            needed for the sentinel */
2355         env.rbs_size = env.n_mem_ops + 1;
2356
2357         /* create the current set */
2358         env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2359         rbitset_set(env.curr_set, env.rbs_size - 1);
2360         env.curr_id_2_memop = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2361         memset(env.curr_id_2_memop, 0, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2362
2363         for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2364                 /* set sentinel bits */
2365                 bl->avail_out  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2366                 rbitset_set(bl->avail_out, env.rbs_size - 1);
2367
2368                 bl->id_2_memop_avail = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2369                 memset(bl->id_2_memop_avail, 0, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
2370
2371                 bl->anticL_in  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2372                 rbitset_set(bl->anticL_in, env.rbs_size - 1);
2373
2374                 bl->id_2_memop_antic = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2375                 memset(bl->id_2_memop_antic, 0, env.rbs_size * sizeof(bl->id_2_memop_antic[0]));
2376         }
2377
2378 //      dump_block_list(&env);
2379
2380         calcAvail();
2381         calcAntic();
2382
2383         insert_Loads_upwards();
2384
2385         if (env.changed) {
2386                 /* over all blocks in reverse post order */
2387                 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2388                         do_replacements(bl);
2389                 }
2390
2391                 /* not only invalidate but free them. We might allocate new out arrays
2392                    on our obstack which will be deleted yet. */
2393                 free_irg_outs(irg);
2394                 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
2395         }
2396 end:
2397
2398         ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2399         ir_nodemap_destroy(&env.adr_map);
2400         obstack_free(&env.obst, NULL);
2401
2402 //      dump_ir_block_graph(irg, "-YYY");
2403
2404 #ifdef DEBUG_libfirm
2405         DEL_ARR_F(env.id_2_address);
2406 #endif
2407
2408         current_ir_graph = rem;
2409         return env.changed != 0;
2410 }  /* opt_ldst */