5625e6e5ecfbf0709a451cc4a5183d559350bf4b
[libfirm] / ir / opt / opt_ldst.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Dataflow driven Load/Store optimizations, uses some ideas from
23  *          VanDrunen's LEPRE
24  * @author  Michael Beck
25  * @version $Id$
26  */
27 #include "config.h"
28
29 #include "irnode_t.h"
30 #include "irflag_t.h"
31 #include "array_t.h"
32 #include "ircons.h"
33 #include "irdom.h"
34 #include "irgmod.h"
35 #include "irgwalk.h"
36 #include "irouts.h"
37 #include "irgraph.h"
38 #include "irgopt.h"
39 #include "iropt.h"
40 #include "iroptimize.h"
41 #include "irnodemap.h"
42 #include "raw_bitset.h"
43 #include "debug.h"
44 #include "error.h"
45
46 /* maximum number of output Proj's */
47 #define MAX_PROJ (pn_Load_max > pn_Store_max ? pn_Load_max : pn_Store_max)
48
49 /**
50  * Mapping an address to an dense ID.
51  */
52 typedef struct address_entry_t {
53         unsigned id;          /**< The ID */
54 } address_entry;
55
56 /**
57  * Memop-flags.
58  */
59 enum memop_flags {
60         FLAG_KILL_LOADS  =  1, /**< KILL all Loads */
61         FLAG_KILL_STORES =  2, /**< KILL all Stores */
62         FLAG_KILLED_NODE =  4, /**< this node was killed */
63         FLAG_EXCEPTION   =  8, /**< this node has exception flow */
64         FLAG_IGNORE      = 16, /**< ignore this node (volatile or other) */
65         /** this memop KILLS all addresses */
66         FLAG_KILL_ALL    = FLAG_KILL_LOADS|FLAG_KILL_STORES
67 };
68
69 /**
70  * A value: This represents a value stored at a given address in
71  * memory. Do not confuse with values from value numbering.
72  */
73 typedef struct value_t value_t;
74 struct value_t {
75         ir_node  *address;    /**< the address of this value */
76         ir_node  *value;      /**< the value itself */
77         ir_mode  *mode;       /**< the mode of the value */
78         unsigned id;          /**< address id */
79 };
80
81 /**
82  * A memop describes an memory-related operation.
83  * These are Loads/Store and all other ops that might modify
84  * memory (Calls, CopyB) or causing exceptions.
85  */
86 typedef struct memop_t memop_t;
87 struct memop_t {
88         value_t  value;      /**< the value of this memop: only defined for Load/Store */
89         ir_node  *node;      /**< the memory op itself */
90         ir_node  *mem;       /**< the memory FROM this node */
91         ir_node  *replace;   /**< the replacement node if this memop is replaced */
92         memop_t  *next;      /**< links to the next memory op in the block in forward order. */
93         memop_t  *prev;      /**< links to the previous memory op in the block in forward order. */
94         unsigned flags;      /**< memop flags */
95         ir_node  *projs[MAX_PROJ]; /**< Projs of this memory op */
96 };
97
98 enum block_flags {
99         BLK_FLAG_NONE      = 0,  /**< No flags */
100         BLK_FLAG_REACHABLE = 1,  /**< This block can be reached. */
101         BLK_FLAG_EVALUATED = 2,  /**< This block was evaluated once. */
102 };
103
104 /**
105  * Additional data for every basic block.
106  */
107 typedef struct block_t block_t;
108 struct block_t {
109         memop_t  *memop_forward;     /**< topologically sorted list of memory ops in this block */
110         memop_t  *memop_backward;    /**< last memop in the list */
111         unsigned *avail_out;         /**< out-set of available addresses */
112         memop_t  **id_2_memop_avail; /**< maps avail address ids to memops */
113         unsigned *anticL_in;         /**< in-set of anticipated Load addresses */
114         memop_t  **id_2_memop_antic; /**< maps anticipated address ids to memops */
115         ir_node  *block;             /**< the associated block */
116         block_t  *forward_next;      /**< next block entry for forward iteration */
117         block_t  *backward_next;     /**< next block entry for backward iteration */
118         memop_t  *avail;             /**< used locally for the avail map */
119         unsigned flags;              /**< additional flags */
120 };
121
122 /**
123  * Metadata for this pass.
124  */
125 typedef struct ldst_env_t {
126         struct obstack  obst;              /**< obstack for temporary data */
127         ir_nodemap_t    adr_map;           /**< Map addresses to */
128         block_t         *forward;          /**< Inverse post-order list of all blocks Start->End */
129         block_t         *backward;         /**< Inverse post-order list of all blocks End->Start */
130         ir_node         *start_bl;         /**< start block of the current graph */
131         ir_node         *end_bl;           /**< end block of the current graph */
132         unsigned        *curr_set;         /**< current set of addresses */
133         memop_t         **curr_id_2_memop; /**< current map of address ids to memops */
134         unsigned        curr_adr_id;       /**< number for address mapping */
135         unsigned        n_mem_ops;         /**< number of memory operations (Loads/Stores) */
136         unsigned        rbs_size;          /**< size of all bitsets in bytes */
137         int             changed;           /**< Flags for changed graph state */
138 } ldst_env;
139
140 #ifdef DEBUG_libfirm
141
142 static firm_dbg_module_t *dbg;
143
144 /* the one and only environment */
145 static ldst_env env;
146
147 /**
148  * Dumps the block list.
149  *
150  * @param ldst environment
151  */
152 static void dump_block_list(ldst_env *env) {
153         block_t *entry;
154         memop_t *op;
155         int     i;
156
157         for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
158                 DB((dbg, LEVEL_2, "%+F {", entry->block));
159
160                 i = 0;
161                 for (op = entry->memop_forward; op != NULL; op = op->next) {
162                         if (i == 0) {
163                                 DB((dbg, LEVEL_2, "\n\t"));
164                         }                       DB((dbg, LEVEL_2, "%+F", op->node));
165                         if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
166                                 DB((dbg, LEVEL_2, "X"));
167                         else if (op->flags & FLAG_KILL_LOADS)
168                                 DB((dbg, LEVEL_2, "L"));
169                         else if (op->flags & FLAG_KILL_STORES)
170                                 DB((dbg, LEVEL_2, "S"));
171                         DB((dbg, LEVEL_2, ", "));
172
173                         i = (i + 1) & 3;
174                 }
175                 DB((dbg, LEVEL_2, "\n}\n\n"));
176         }
177 }
178
179 /**
180  * Dumps the current set.
181  *
182  * @param bl   current block
183  * @param s    name of the set
184  */
185 static void dump_curr(block_t *bl, const char *s) {
186         unsigned pos = 0;
187         unsigned end = env.n_mem_ops * 2 - 1;
188         int      i;
189
190         DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
191         i = 0;
192         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
193                 memop_t *op = env.curr_id_2_memop[pos];
194
195                 if (i == 0) {
196                         DB((dbg, LEVEL_2, "\n\t"));
197                 }
198                 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
199                 i = (i + 1) & 3;
200         }
201         DB((dbg, LEVEL_2, "\n}\n"));
202 }
203
204 #else
205 #define dump_block_list()
206 #define dump_curr(bl, s)
207 #endif /* DEBUG_libfirm */
208
209 /** Get the block entry for a block node */
210 static block_t *get_block_entry(const ir_node *block) {
211         assert(is_Block(block));
212
213         return get_irn_link(block);
214 }
215
216 /** Get the memop entry for a memory operation node */
217 static memop_t *get_irn_memop(const ir_node *irn) {
218         assert(! is_Block(irn));
219         return get_irn_link(irn);
220 }
221
222 /**
223  * Walk over the memory edges from definition to users.
224  * This ensures, that even operation without memory output are found.
225  *
226  * @param irn   start node
227  * @param pre   pre walker function
228  * @param post  post walker function
229  * @param ctx   context parameter for the walker functions
230  */
231 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx) {
232         int     i;
233         ir_mode *mode;
234
235         mark_irn_visited(irn);
236
237         if (pre)
238                 pre(irn, ctx);
239
240         mode = get_irn_mode(irn);
241         if (mode == mode_M) {
242                 /* every successor uses memory */
243                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
244                         ir_node *succ = get_irn_out(irn, i);
245
246                         if (! irn_visited(succ))
247                                 walk_memory(succ, pre, post, ctx);
248                 }
249         } else if (mode == mode_T) {
250                 /* only some Proj's uses memory */
251                 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
252                         ir_node *proj = get_irn_out(irn, i);
253
254                         if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
255                                 walk_memory(proj, pre, post, ctx);
256                 }
257         }
258         if (post)
259                 post(irn, ctx);
260 }
261
262 /**
263  * Walks over all memory nodes of a graph.
264  *
265  * @param irg   a graph
266  * @param pre   pre walker function
267  * @param post  post walker function
268  * @param ctx   context parameter for the walker functions
269  */
270 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx) {
271         inc_irg_visited(irg);
272
273         ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
274
275         /*
276          * there are two possible sources for memory: initial_mem and nomem
277          * we ignore nomem as this should NOT change the memory
278          */
279         walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
280
281         ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
282 }
283
284 /**
285  * Block walker: allocate an block entry for every block.
286  */
287 static void prepare_blocks(ir_node *block, void *ctx) {
288         block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
289
290         (void)ctx;
291
292         entry->memop_forward    = NULL;
293         entry->memop_backward   = NULL;
294         entry->avail_out        = NULL;
295         entry->id_2_memop_avail = NULL;
296         entry->anticL_in        = NULL;
297         entry->id_2_memop_antic = NULL;
298         entry->block            = block;
299         entry->forward_next     = NULL;
300         entry->backward_next    = NULL;
301         entry->avail            = NULL;
302         entry->flags            = BLK_FLAG_NONE;
303         set_irn_link(block, entry);
304 }
305
306 /**
307  * Block walker: creates the inverse post-order list for the CFG.
308  */
309 static void inverse_post_order(ir_node *block, void *ctx) {
310         block_t *entry = get_block_entry(block);
311
312         (void)ctx;
313
314         /* create the list in inverse order */
315         entry->forward_next = env.forward;
316         entry->flags       |= BLK_FLAG_REACHABLE;
317         env.forward         = entry;
318
319         /* remember the first visited (last in list) entry, needed for later */
320         if (env.backward == NULL)
321                 env.backward = entry;
322 }
323
324 /**
325  * Block walker: create backward links for the memops of a block.
326  */
327 static void collect_backward(ir_node *block, void *ctx) {
328         block_t *entry = get_block_entry(block);
329         memop_t *last, *op;
330
331         (void)ctx;
332
333         /* ignore unreachable blocks */
334         if (!(entry->flags & BLK_FLAG_REACHABLE))
335                 return;
336
337         /*
338          * Do NOT link in the end block yet. We want it to be
339          * the first in the list. This is NOT guaranteed by the walker
340          * if we have endless loops.
341          */
342         if (block != env.end_bl) {
343                 entry->backward_next = env.backward;
344
345                 /* create the list in inverse order */
346                 env.backward = entry;
347         }
348
349         /* create backward links for all memory ops */
350         last = NULL;
351         for (op = entry->memop_forward; op != NULL; op = op->next) {
352                 op->prev = last;
353                 last     = op;
354         }
355         entry->memop_backward = last;
356 }
357
358 /**
359  * Allocate a memop.
360  *
361  * @param irn  the IR-node representing the memop
362  */
363 static memop_t *alloc_memop(ir_node *irn) {
364         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
365
366         m->value.address = NULL;
367         m->value.value   = NULL;
368         m->value.mode    = NULL;
369
370         m->node          = irn;
371         m->mem           = NULL;
372         m->replace       = NULL;
373         m->next          = NULL;
374         m->flags         = 0;
375
376         memset(m->projs, 0, sizeof(m->projs));
377
378         set_irn_link(irn, m);
379         return m;
380 }
381
382 /**
383  * Create a memop for a Phi-replacement.
384  *
385  * @param op   the memop to clone
386  * @param phi  the Phi-node representing the new value
387  */
388 static memop_t *clone_memop_phi(memop_t *op, ir_node *phi) {
389         memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
390
391         m->value         = op->value;
392         m->value.value   = phi;
393
394         m->node          = phi;
395         m->replace       = NULL;
396         m->next          = NULL;
397         m->flags         = 0;
398
399         set_irn_link(phi, m);
400         return m;
401 }
402
403 /**
404  * Register an address and allocate an ID for it.
405  *
406  * @param adr  the IR-node representing the address
407  */
408 static unsigned register_address(ir_node *adr) {
409         address_entry *entry;
410
411         /* skip Confirms and Casts */
412 restart:
413         if (is_Confirm(adr)) {
414                 adr = get_Confirm_value(adr);
415                 goto restart;
416         }
417         if (is_Cast(adr)) {
418                 adr = get_Cast_op(adr);
419                 goto restart;
420         }
421
422         entry = ir_nodemap_get(&env.adr_map, adr);
423
424         if (entry == NULL) {
425                 /* new address */
426                 entry = obstack_alloc(&env.obst, sizeof(*entry));
427
428                 entry->id = env.curr_adr_id++;
429                 ir_nodemap_insert(&env.adr_map, adr, entry);
430
431                 DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
432         }
433         return entry->id;
434 }
435
436 /**
437  * Return the memory properties of a call node.
438  *
439  * @param call  the call node
440  *
441  * return a bitset of mtp_property_const and mtp_property_pure
442  */
443 static unsigned get_Call_memory_properties(ir_node *call) {
444         ir_type *call_tp = get_Call_type(call);
445         unsigned prop = get_method_additional_properties(call_tp);
446
447         /* check first the call type */
448         if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
449                 /* try the called entity */
450                 ir_node *ptr = get_Call_ptr(call);
451
452                 if (is_Global(ptr)) {
453                         ir_entity *ent = get_Global_entity(ptr);
454
455                         prop = get_entity_additional_properties(ent);
456                 }
457         }
458         return prop & (mtp_property_const|mtp_property_pure);
459 }
460
461 /**
462  * Returns an entity if the address ptr points to a constant one.
463  *
464  * @param ptr  the address
465  *
466  * @return an entity or NULL
467  */
468 static ir_entity *find_constant_entity(ir_node *ptr)
469 {
470         for (;;) {
471                 if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
472                         return get_SymConst_entity(ptr);
473                 } else if (is_Sel(ptr)) {
474                         ir_entity *ent = get_Sel_entity(ptr);
475                         ir_type   *tp  = get_entity_owner(ent);
476
477                         /* Do not fiddle with polymorphism. */
478                         if (is_Class_type(get_entity_owner(ent)) &&
479                                 ((get_entity_n_overwrites(ent)    != 0) ||
480                                 (get_entity_n_overwrittenby(ent) != 0)   ) )
481                                 return NULL;
482
483                         if (is_Array_type(tp)) {
484                                 /* check bounds */
485                                 int i, n;
486
487                                 for (i = 0, n = get_Sel_n_indexs(ptr); i < n; ++i) {
488                                         ir_node *bound;
489                                         tarval *tlower, *tupper;
490                                         ir_node *index = get_Sel_index(ptr, i);
491                                         tarval *tv     = computed_value(index);
492
493                                         /* check if the index is constant */
494                                         if (tv == tarval_bad)
495                                                 return NULL;
496
497                                         bound  = get_array_lower_bound(tp, i);
498                                         tlower = computed_value(bound);
499                                         bound  = get_array_upper_bound(tp, i);
500                                         tupper = computed_value(bound);
501
502                                         if (tlower == tarval_bad || tupper == tarval_bad)
503                                                 return NULL;
504
505                                         if (tarval_cmp(tv, tlower) & pn_Cmp_Lt)
506                                                 return NULL;
507                                         if (tarval_cmp(tupper, tv) & pn_Cmp_Lt)
508                                                 return NULL;
509
510                                         /* ok, bounds check finished */
511                                 }
512                         }
513
514                         if (variability_constant == get_entity_variability(ent))
515                                 return ent;
516
517                         /* try next */
518                         ptr = get_Sel_ptr(ptr);
519                 } else if (is_Add(ptr)) {
520                         ir_node *l = get_Add_left(ptr);
521                         ir_node *r = get_Add_right(ptr);
522
523                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
524                                 ptr = l;
525                         else if (get_irn_mode(r) == get_irn_mode(ptr) && is_Const(l))
526                                 ptr = r;
527                         else
528                                 return NULL;
529
530                         /* for now, we support only one addition, reassoc should fold all others */
531                         if (! is_SymConst(ptr) && !is_Sel(ptr))
532                                 return NULL;
533                 } else if (is_Sub(ptr)) {
534                         ir_node *l = get_Sub_left(ptr);
535                         ir_node *r = get_Sub_right(ptr);
536
537                         if (get_irn_mode(l) == get_irn_mode(ptr) &&     is_Const(r))
538                                 ptr = l;
539                         else
540                                 return NULL;
541                         /* for now, we support only one subtraction, reassoc should fold all others */
542                         if (! is_SymConst(ptr) && !is_Sel(ptr))
543                                 return NULL;
544                 } else
545                         return NULL;
546         }
547 }  /* find_constant_entity */
548
549 /**
550  * Return the Selection index of a Sel node from dimension n
551  */
552 static long get_Sel_array_index_long(ir_node *n, int dim) {
553         ir_node *index = get_Sel_index(n, dim);
554         assert(is_Const(index));
555         return get_tarval_long(get_Const_tarval(index));
556 }  /* get_Sel_array_index_long */
557
558 /**
559  * Returns the accessed component graph path for an
560  * node computing an address.
561  *
562  * @param ptr    the node computing the address
563  * @param depth  current depth in steps upward from the root
564  *               of the address
565  */
566 static compound_graph_path *rec_get_accessed_path(ir_node *ptr, int depth) {
567         compound_graph_path *res = NULL;
568         ir_entity           *root, *field, *ent;
569         int                 path_len, pos, idx;
570         tarval              *tv;
571         ir_type             *tp;
572
573         if (is_SymConst(ptr)) {
574                 /* a SymConst. If the depth is 0, this is an access to a global
575                  * entity and we don't need a component path, else we know
576                  * at least its length.
577                  */
578                 assert(get_SymConst_kind(ptr) == symconst_addr_ent);
579                 root = get_SymConst_entity(ptr);
580                 res = (depth == 0) ? NULL : new_compound_graph_path(get_entity_type(root), depth);
581         } else if (is_Sel(ptr)) {
582                 /* it's a Sel, go up until we find the root */
583                 res = rec_get_accessed_path(get_Sel_ptr(ptr), depth+1);
584                 if (res == NULL)
585                         return NULL;
586
587                 /* fill up the step in the path at the current position */
588                 field    = get_Sel_entity(ptr);
589                 path_len = get_compound_graph_path_length(res);
590                 pos      = path_len - depth - 1;
591                 set_compound_graph_path_node(res, pos, field);
592
593                 if (is_Array_type(get_entity_owner(field))) {
594                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
595                         set_compound_graph_path_array_index(res, pos, get_Sel_array_index_long(ptr, 0));
596                 }
597         } else if (is_Add(ptr)) {
598                 ir_node *l    = get_Add_left(ptr);
599                 ir_node *r    = get_Add_right(ptr);
600                 ir_mode *mode = get_irn_mode(ptr);
601                 tarval  *tmp;
602
603                 if (is_Const(r) && get_irn_mode(l) == mode) {
604                         ptr = l;
605                         tv  = get_Const_tarval(r);
606                 } else {
607                         ptr = r;
608                         tv  = get_Const_tarval(l);
609                 }
610 ptr_arith:
611                 mode = get_tarval_mode(tv);
612                 tmp  = tv;
613
614                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
615                 if (is_Sel(ptr)) {
616                         field = get_Sel_entity(ptr);
617                 } else {
618                         field = get_SymConst_entity(ptr);
619                 }
620                 idx = 0;
621                 for (ent = field;;) {
622                         unsigned size;
623                         tarval   *sz, *tv_index, *tlower, *tupper;
624                         ir_node  *bound;
625
626                         tp = get_entity_type(ent);
627                         if (! is_Array_type(tp))
628                                 break;
629                         ent = get_array_element_entity(tp);
630                         size = get_type_size_bytes(get_entity_type(ent));
631                         sz   = new_tarval_from_long(size, mode);
632
633                         tv_index = tarval_div(tmp, sz);
634                         tmp      = tarval_mod(tmp, sz);
635
636                         if (tv_index == tarval_bad || tmp == tarval_bad)
637                                 return NULL;
638
639                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
640                         bound  = get_array_lower_bound(tp, 0);
641                         tlower = computed_value(bound);
642                         bound  = get_array_upper_bound(tp, 0);
643                         tupper = computed_value(bound);
644
645                         if (tlower == tarval_bad || tupper == tarval_bad)
646                                 return NULL;
647
648                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
649                                 return NULL;
650                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
651                                 return NULL;
652
653                         /* ok, bounds check finished */
654                         ++idx;
655                 }
656                 if (! tarval_is_null(tmp)) {
657                         /* access to some struct/union member */
658                         return NULL;
659                 }
660
661                 /* should be at least ONE array */
662                 if (idx == 0)
663                         return NULL;
664
665                 res = rec_get_accessed_path(ptr, depth + idx);
666                 if (res == NULL)
667                         return NULL;
668
669                 path_len = get_compound_graph_path_length(res);
670                 pos      = path_len - depth - idx;
671
672                 for (ent = field;;) {
673                         unsigned size;
674                         tarval   *sz, *tv_index;
675                         long     index;
676
677                         tp = get_entity_type(ent);
678                         if (! is_Array_type(tp))
679                                 break;
680                         ent = get_array_element_entity(tp);
681                         set_compound_graph_path_node(res, pos, ent);
682
683                         size = get_type_size_bytes(get_entity_type(ent));
684                         sz   = new_tarval_from_long(size, mode);
685
686                         tv_index = tarval_div(tv, sz);
687                         tv       = tarval_mod(tv, sz);
688
689                         /* worked above, should work again */
690                         assert(tv_index != tarval_bad && tv != tarval_bad);
691
692                         /* bounds already checked above */
693                         index = get_tarval_long(tv_index);
694                         set_compound_graph_path_array_index(res, pos, index);
695                         ++pos;
696                 }
697         } else if (is_Sub(ptr)) {
698                 ir_node *l = get_Sub_left(ptr);
699                 ir_node *r = get_Sub_right(ptr);
700
701                 ptr = l;
702                 tv  = get_Const_tarval(r);
703                 tv  = tarval_neg(tv);
704                 goto ptr_arith;
705         }
706         return res;
707 }  /* rec_get_accessed_path */
708
709 /**
710  * Returns an access path or NULL.  The access path is only
711  * valid, if the graph is in phase_high and _no_ address computation is used.
712  */
713 static compound_graph_path *get_accessed_path(ir_node *ptr) {
714         compound_graph_path *gr = rec_get_accessed_path(ptr, 0);
715         return gr;
716 }  /* get_accessed_path */
717
718 typedef struct path_entry {
719         ir_entity         *ent;
720         struct path_entry *next;
721         long              index;
722 } path_entry;
723
724 static ir_node *rec_find_compound_ent_value(ir_node *ptr, path_entry *next) {
725         path_entry       entry, *p;
726         ir_entity        *ent, *field;
727         ir_initializer_t *initializer;
728         tarval           *tv;
729         ir_type          *tp;
730         unsigned         n;
731
732         entry.next = next;
733         if (is_SymConst(ptr)) {
734                 /* found the root */
735                 ent         = get_SymConst_entity(ptr);
736                 initializer = get_entity_initializer(ent);
737                 for (p = next; p != NULL;) {
738                         if (initializer->kind != IR_INITIALIZER_COMPOUND)
739                                 return NULL;
740                         n  = get_initializer_compound_n_entries(initializer);
741                         tp = get_entity_type(ent);
742
743                         if (is_Array_type(tp)) {
744                                 ent = get_array_element_entity(tp);
745                                 if (ent != p->ent) {
746                                         /* a missing [0] */
747                                         if (0 >= n)
748                                                 return NULL;
749                                         initializer = get_initializer_compound_value(initializer, 0);
750                                         continue;
751                                 }
752                         }
753                         if (p->index >= (int) n)
754                                 return NULL;
755                         initializer = get_initializer_compound_value(initializer, p->index);
756
757                         ent = p->ent;
758                         p   = p->next;
759                 }
760                 tp = get_entity_type(ent);
761                 while (is_Array_type(tp)) {
762                         ent = get_array_element_entity(tp);
763                         tp = get_entity_type(ent);
764                         /* a missing [0] */
765                         n  = get_initializer_compound_n_entries(initializer);
766                         if (0 >= n)
767                                 return NULL;
768                         initializer = get_initializer_compound_value(initializer, 0);
769                 }
770
771                 switch (initializer->kind) {
772                 case IR_INITIALIZER_CONST:
773                         return get_initializer_const_value(initializer);
774                 case IR_INITIALIZER_TARVAL:
775                 case IR_INITIALIZER_NULL:
776                 default:
777                         return NULL;
778                 }
779         } else if (is_Sel(ptr)) {
780                 entry.ent = field = get_Sel_entity(ptr);
781                 tp = get_entity_owner(field);
782                 if (is_Array_type(tp)) {
783                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
784                         entry.index = get_Sel_array_index_long(ptr, 0) - get_array_lower_bound_int(tp, 0);
785                 } else {
786                         int i, n_members = get_compound_n_members(tp);
787                         for (i = 0; i < n_members; ++i) {
788                                 if (get_compound_member(tp, i) == field)
789                                         break;
790                         }
791                         if (i >= n_members) {
792                                 /* not found: should NOT happen */
793                                 return NULL;
794                         }
795                         entry.index = i;
796                 }
797                 return rec_find_compound_ent_value(get_Sel_ptr(ptr), &entry);
798         }  else if (is_Add(ptr)) {
799                 ir_node  *l = get_Add_left(ptr);
800                 ir_node  *r = get_Add_right(ptr);
801                 ir_mode  *mode;
802                 unsigned pos;
803
804                 if (is_Const(r)) {
805                         ptr = l;
806                         tv  = get_Const_tarval(r);
807                 } else {
808                         ptr = r;
809                         tv  = get_Const_tarval(l);
810                 }
811 ptr_arith:
812                 mode = get_tarval_mode(tv);
813
814                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
815                 if (is_Sel(ptr)) {
816                         field = get_Sel_entity(ptr);
817                 } else {
818                         field = get_SymConst_entity(ptr);
819                 }
820
821                 /* count needed entries */
822                 pos = 0;
823                 for (ent = field;;) {
824                         tp = get_entity_type(ent);
825                         if (! is_Array_type(tp))
826                                 break;
827                         ent = get_array_element_entity(tp);
828                         ++pos;
829                 }
830                 /* should be at least ONE entry */
831                 if (pos == 0)
832                         return NULL;
833
834                 /* allocate the right number of entries */
835                 NEW_ARR_A(path_entry, p, pos);
836
837                 /* fill them up */
838                 pos = 0;
839                 for (ent = field;;) {
840                         unsigned size;
841                         tarval   *sz, *tv_index, *tlower, *tupper;
842                         long     index;
843                         ir_node  *bound;
844
845                         tp = get_entity_type(ent);
846                         if (! is_Array_type(tp))
847                                 break;
848                         ent = get_array_element_entity(tp);
849                         p[pos].ent  = ent;
850                         p[pos].next = &p[pos + 1];
851
852                         size = get_type_size_bytes(get_entity_type(ent));
853                         sz   = new_tarval_from_long(size, mode);
854
855                         tv_index = tarval_div(tv, sz);
856                         tv       = tarval_mod(tv, sz);
857
858                         if (tv_index == tarval_bad || tv == tarval_bad)
859                                 return NULL;
860
861                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
862                         bound  = get_array_lower_bound(tp, 0);
863                         tlower = computed_value(bound);
864                         bound  = get_array_upper_bound(tp, 0);
865                         tupper = computed_value(bound);
866
867                         if (tlower == tarval_bad || tupper == tarval_bad)
868                                 return NULL;
869
870                         if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
871                                 return NULL;
872                         if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
873                                 return NULL;
874
875                         /* ok, bounds check finished */
876                         index = get_tarval_long(tv_index);
877                         p[pos].index = index;
878                         ++pos;
879                 }
880                 if (! tarval_is_null(tv)) {
881                         /* hmm, wrong access */
882                         return NULL;
883                 }
884                 p[pos - 1].next = next;
885                 return rec_find_compound_ent_value(ptr, p);
886         } else if (is_Sub(ptr)) {
887                 ir_node *l = get_Sub_left(ptr);
888                 ir_node *r = get_Sub_right(ptr);
889
890                 ptr = l;
891                 tv  = get_Const_tarval(r);
892                 tv  = tarval_neg(tv);
893                 goto ptr_arith;
894         }
895         return NULL;
896 }  /* rec_find_compound_ent_value */
897
898 static ir_node *find_compound_ent_value(ir_node *ptr) {
899         return rec_find_compound_ent_value(ptr, NULL);
900 }  /* find_compound_ent_value */
901
902 /**
903  * Mark a Load memop to be replace by a definition
904  *
905  * @param op  the Load memop
906  */
907 static void mark_replace_load(memop_t *op, ir_node *def) {
908         op->replace = def;
909         op->flags |= FLAG_KILLED_NODE;
910         env.changed = 1;
911 }  /* mark_replace_load */
912
913 /**
914  * Mark a Store memop to be removed.
915  *
916  * @param op  the Store memop
917  */
918 static void mark_remove_store(memop_t *op) {
919         op->flags |= FLAG_KILLED_NODE;
920         env.changed = 1;
921 }  /* mark_remove_store */
922
923 /**
924  * Update a memop for a Load.
925  *
926  * @param m  the memop
927  */
928 static void update_Load_memop(memop_t *m) {
929         int       i;
930         ir_node   *load = m->node;
931         ir_node   *ptr;
932         ir_entity *ent;
933
934         if (get_Load_volatility(load) == volatility_is_volatile)
935                 m->flags |= FLAG_IGNORE;
936
937         ptr = get_Load_ptr(load);
938
939         m->value.address = ptr;
940
941         for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
942                 ir_node *proj = get_irn_out(load, i);
943                 long    pn;
944
945                 /* beware of keep edges */
946                 if (is_End(proj))
947                         continue;
948
949                 pn = get_Proj_proj(proj);
950                 m->projs[pn] = proj;
951                 switch (pn) {
952                 case pn_Load_res:
953                         m->value.value = proj;
954                         m->value.mode  = get_irn_mode(proj);
955                         break;
956                 case pn_Load_X_except:
957                         m->flags |= FLAG_EXCEPTION;
958                         break;
959                 case pn_Load_M:
960                         m->mem = proj;
961                         break;
962                 case pn_Load_X_regular:
963                         break;
964                 default:
965                         panic("Unsupported Proj from Load %+F", proj);
966                 }
967         }
968
969         /* check if we can determine the entity that will be loaded */
970         ent = find_constant_entity(ptr);
971
972         if (ent != NULL                                     &&
973                 allocation_static == get_entity_allocation(ent) &&
974                 visibility_external_allocated != get_entity_visibility(ent)) {
975                 /* a static allocation that is not external: there should be NO exception
976                  * when loading even if we cannot replace the load itself. */
977                 ir_node *value = NULL;
978
979                 /* no exception, clear the m fields as it might be checked later again */
980                 if (m->projs[pn_Load_X_except]) {
981                         exchange(m->projs[pn_Load_X_except], new_Bad());
982                         m->projs[pn_Load_X_except] = NULL;
983                         m->flags &= ~FLAG_EXCEPTION;
984                         env.changed = 1;
985                 }
986                 if (m->projs[pn_Load_X_regular]) {
987                         exchange(m->projs[pn_Load_X_regular], new_r_Jmp(current_ir_graph, get_nodes_block(load)));
988                         m->projs[pn_Load_X_regular] = NULL;
989                         env.changed = 1;
990                 }
991
992                 if (variability_constant == get_entity_variability(ent)) {
993                         if (is_atomic_entity(ent)) {
994                                 /* Might not be atomic after lowering of Sels.  In this case we
995                                  * could also load, but it's more complicated. */
996                                 /* more simpler case: we load the content of a constant value:
997                                  * replace it by the constant itself */
998                                 value = get_atomic_ent_value(ent);
999                         } else if (ent->has_initializer) {
1000                                 /* new style initializer */
1001                                 value = find_compound_ent_value(ptr);
1002                         } else {
1003                                 /* old style initializer */
1004                                 compound_graph_path *path = get_accessed_path(ptr);
1005
1006                                 if (path != NULL) {
1007                                         assert(is_proper_compound_graph_path(path, get_compound_graph_path_length(path)-1));
1008
1009                                         value = get_compound_ent_value_by_path(ent, path);
1010                                         DB((dbg, LEVEL_1, "  Constant access at %F%F resulted in %+F\n", ent, path, value));
1011                                         free_compound_graph_path(path);
1012                                 }
1013                         }
1014                         if (value != NULL)
1015                                 value = can_replace_load_by_const(load, value);
1016                 }
1017
1018                 if (value != NULL) {
1019                         /* we completely replace the load by this value */
1020                         DB((dbg, LEVEL_1, "Replacing Load %+F by constant %+F\n", m->node, value));
1021                         mark_replace_load(m, value);
1022                         return;
1023                 }
1024         }
1025
1026         if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
1027                 /* only create an address if this node is NOT killed immediately or ignored */
1028                 m->value.id = register_address(ptr);
1029                 ++env.n_mem_ops;
1030         } else {
1031                 /* no user, KILL it */
1032                 m->flags |= FLAG_KILLED_NODE;
1033         }
1034 }
1035
1036 /**
1037  * Update a memop for a Store.
1038  *
1039  * @param m  the memop
1040  */
1041 static void update_Store_memop(memop_t *m) {
1042         int     i;
1043         ir_node *store = m->node;
1044         ir_node *adr   = get_Store_ptr(store);
1045
1046         if (get_Store_volatility(store) == volatility_is_volatile) {
1047                 m->flags |= FLAG_IGNORE;
1048         } else {
1049                 /* only create an address if this node is NOT ignored */
1050                 m->value.id = register_address(adr);
1051                 ++env.n_mem_ops;
1052         }
1053
1054         m->value.address = adr;
1055
1056         for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
1057                 ir_node *proj = get_irn_out(store, i);
1058                 long    pn;
1059
1060                 /* beware of keep edges */
1061                 if (is_End(proj))
1062                         continue;
1063
1064                 pn = get_Proj_proj(proj);
1065                 m->projs[pn] = proj;
1066                 switch (pn) {
1067                 case pn_Store_X_except:
1068                         m->flags |= FLAG_EXCEPTION;
1069                         break;
1070                 case pn_Store_M:
1071                         m->mem = proj;
1072                         break;
1073                 case pn_Store_X_regular:
1074                         break;
1075                 default:
1076                         panic("Unsupported Proj from Store %+F", proj);
1077                 }
1078         }
1079         m->value.value = get_Store_value(store);
1080         m->value.mode  = get_irn_mode(m->value.value);
1081 }
1082
1083 /**
1084  * Update a memop for a Call.
1085  *
1086  * @param m  the memop
1087  */
1088 static void update_Call_memop(memop_t *m) {
1089         ir_node  *call = m->node;
1090         unsigned prop  = get_Call_memory_properties(call);
1091         int      i;
1092
1093         if (prop & mtp_property_const) {
1094                 /* A constant call did NOT use memory at all, we
1095                    can kick it from the list. */
1096         } else if (prop & mtp_property_pure) {
1097                 /* pure calls READ memory */
1098                 m->flags = FLAG_KILL_STORES;
1099         } else
1100                 m->flags = FLAG_KILL_ALL;
1101
1102         for (i = get_irn_n_outs(call) - 1; i >= 0; --i) {
1103                 ir_node *proj = get_irn_out(call, i);
1104
1105                 /* beware of keep edges */
1106                 if (is_End(proj))
1107                         continue;
1108
1109                 switch (get_Proj_proj(proj)) {
1110                 case pn_Call_X_except:
1111                         m->flags |= FLAG_EXCEPTION;
1112                         break;
1113                 case pn_Call_M_regular:
1114                         m->mem = proj;
1115                         break;
1116                 }
1117         }
1118 }
1119
1120 /**
1121  * Update a memop for a Div/Mod/Quot/DivMod.
1122  *
1123  * @param m  the memop
1124  */
1125 static void update_DivOp_memop(memop_t *m) {
1126         ir_node *div = m->node;
1127         int     i;
1128
1129         for (i = get_irn_n_outs(div) - 1; i >= 0; --i) {
1130                 ir_node *proj = get_irn_out(div, i);
1131
1132                 /* beware of keep edges */
1133                 if (is_End(proj))
1134                         continue;
1135
1136                 switch (get_Proj_proj(proj)) {
1137                 case pn_Generic_X_except:
1138                         m->flags |= FLAG_EXCEPTION;
1139                         break;
1140                 case pn_Generic_M_regular:
1141                         m->mem = proj;
1142                         break;
1143                 }
1144         }
1145 }
1146
1147 /**
1148  * Update a memop for a Phi.
1149  *
1150  * @param m  the memop
1151  */
1152 static void update_Phi_memop(memop_t *m) {
1153         /* the Phi is it's own mem */
1154         m->mem = m->node;
1155 }
1156
1157 /**
1158  * Memory walker: collect all memory ops and build topological lists.
1159  */
1160 static void collect_memops(ir_node *irn, void *ctx) {
1161         memop_t  *op;
1162         ir_node  *block;
1163         block_t  *entry;
1164
1165         (void) ctx;
1166         if (is_Proj(irn)) {
1167                 /* we can safely ignore ProjM's except the initial memory */
1168                 if (irn != get_irg_initial_mem(current_ir_graph))
1169                         return;
1170         }
1171
1172         op    = alloc_memop(irn);
1173         block = get_nodes_block(irn);
1174         entry = get_block_entry(block);
1175
1176         if (is_Phi(irn)) {
1177                 update_Phi_memop(op);
1178                 /* Phis must be always placed first */
1179                 op->next = entry->memop_forward;
1180                 entry->memop_forward = op;
1181                 if (entry->memop_backward == NULL)
1182                         entry->memop_backward = op;
1183         } else {
1184                 switch (get_irn_opcode(irn)) {
1185                 case iro_Load:
1186                         update_Load_memop(op);
1187                         break;
1188                 case iro_Store:
1189                         update_Store_memop(op);
1190                         break;
1191                 case iro_Call:
1192                         update_Call_memop(op);
1193                         break;
1194                 case iro_Sync:
1195                 case iro_Pin:
1196                         op->mem = irn;
1197                         break;
1198                 case iro_Proj:
1199                         /* initial memory */
1200                         op->mem = irn;
1201                         break;
1202                 case iro_Return:
1203                 case iro_End:
1204                         /* we can those to find the memory edge */
1205                         break;
1206                 case iro_Div:
1207                 case iro_DivMod:
1208                 case iro_Quot:
1209                 case iro_Mod:
1210                         update_DivOp_memop(op);
1211                         break;
1212
1213                 case iro_Builtin:
1214                         /* TODO: handle some builtins */
1215                 default:
1216                         /* unsupported operation */
1217                         op->flags = FLAG_KILL_ALL;
1218                 }
1219
1220
1221                 /* all other should be placed last */
1222                 if (entry->memop_backward == NULL) {
1223                         entry->memop_forward = entry->memop_backward = op;
1224                 } else {
1225                         entry->memop_backward->next = op;
1226                         entry->memop_backward       = op;
1227                 }
1228         }
1229 }
1230
1231 /**
1232  * Find an address in the current set.
1233  *
1234  * @param value  the value to be searched for
1235  */
1236 static memop_t *find_address(const value_t *value) {
1237         if (rbitset_is_set(env.curr_set, value->id)) {
1238                 memop_t *res = env.curr_id_2_memop[value->id];
1239
1240                 if (res->value.mode == value->mode)
1241                         return res;
1242                 /* allow hidden casts */
1243                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1244                     get_mode_arithmetic(value->mode) == irma_twos_complement &&
1245                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1246                         return res;
1247         }
1248         return NULL;
1249 }
1250
1251 /**
1252  * Find an address in the avail_out set.
1253  *
1254  * @param bl     the block
1255  * @param value  the value to be searched for
1256  */
1257 static memop_t *find_address_avail(const block_t *bl, const value_t *value) {
1258         if (rbitset_is_set(bl->avail_out, value->id)) {
1259                 memop_t *res = bl->id_2_memop_avail[value->id];
1260
1261                 if (res->value.mode == value->mode)
1262                         return res;
1263                 /* allow hidden casts */
1264                 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1265                     get_mode_arithmetic(value->mode) == irma_twos_complement &&
1266                     get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1267                         return res;
1268         }
1269         return NULL;
1270 }
1271
1272 /**
1273  * Kill all Loads from the current set.
1274  */
1275 static void kill_all_loads(void) {
1276         unsigned pos = 0;
1277         unsigned end = env.n_mem_ops * 2 - 1;
1278
1279         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1280                 memop_t *op = env.curr_id_2_memop[pos];
1281
1282                 if (! is_Store(op->node))
1283                         rbitset_clear(env.curr_set, pos);
1284         }
1285 }
1286
1287 /**
1288  * Kill all Stores from the current set.
1289  */
1290 static void kill_all_stores(void) {
1291         unsigned pos = 0;
1292         unsigned end = env.n_mem_ops * 2 - 1;
1293
1294         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1295                 memop_t *op = env.curr_id_2_memop[pos];
1296
1297                 if (is_Store(op->node))
1298                         rbitset_clear(env.curr_set, pos);
1299         }
1300 }
1301
1302 /**
1303  * Kill all addresses from the current set.
1304  */
1305 static void kill_all(void) {
1306         rbitset_clear_all(env.curr_set, env.rbs_size);
1307
1308         /* set sentinel */
1309         rbitset_set(env.curr_set, env.rbs_size - 1);
1310 }
1311
1312
1313 /**
1314  * Kill Stores that are not alias free due to a Load value from the current set.
1315  *
1316  * @param value  the Load value
1317  */
1318 static void kill_stores(const value_t *value) {
1319         unsigned pos = 0;
1320         unsigned end = env.n_mem_ops * 2 - 1;
1321
1322         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1323                 memop_t *op = env.curr_id_2_memop[pos];
1324
1325                 if (is_Store(op->node)) {
1326                         if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
1327                                                               op->value.address, op->value.mode)) {
1328                                 rbitset_clear(env.curr_set, pos);
1329                                 env.curr_id_2_memop[pos] = NULL;
1330                         }
1331                 }
1332         }
1333 }
1334
1335 /**
1336  * Kill memops that are not alias free due to a Store value from the current set.
1337  *
1338  * @param value  the Store value
1339  */
1340 static void kill_memops(const value_t *value) {
1341         unsigned pos = 0;
1342         unsigned end = env.n_mem_ops * 2 - 1;
1343
1344         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1345                 memop_t *op = env.curr_id_2_memop[pos];
1346
1347                 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
1348                                                           op->value.address, op->value.mode)) {
1349                         rbitset_clear(env.curr_set, pos);
1350                         env.curr_id_2_memop[pos] = NULL;
1351                 }
1352         }
1353 }
1354
1355 /**
1356  * Add the value of a memop to the current set.
1357  *
1358  * @param op  the memory op
1359  */
1360 static void add_memop(memop_t *op) {
1361         rbitset_set(env.curr_set, op->value.id);
1362         env.curr_id_2_memop[op->value.id] = op;
1363 }
1364
1365 /**
1366  * Add the value of a memop to the avail_out set.
1367  *
1368  * @param bl  the block
1369  * @param op  the memory op
1370  */
1371 static void add_memop_avail(block_t *bl, memop_t *op) {
1372         rbitset_set(bl->avail_out, op->value.id);
1373         bl->id_2_memop_avail[op->value.id] = op;
1374 }
1375
1376 /**
1377  * Update a value of a memop to the avail_out set.
1378  *
1379  * @param bl  the block
1380  * @param op  the memory op
1381  */
1382 static void update_memop_avail(block_t *bl, memop_t *op) {
1383         if (rbitset_is_set(bl->avail_out, op->value.id))
1384                 bl->id_2_memop_avail[op->value.id] = op;
1385 }
1386
1387 /**
1388  * Check, if we can convert a value of one mode to another mode
1389  * without changing the representation of bits.
1390  */
1391 static int can_convert_to(const ir_mode *from, const ir_mode *to) {
1392         if (get_mode_arithmetic(from) == irma_twos_complement &&
1393             get_mode_arithmetic(to) == irma_twos_complement &&
1394             get_mode_size_bits(from) == get_mode_size_bits(to))
1395                 return 1;
1396         return 0;
1397 }
1398
1399 /**
1400  * Add a Conv if needed.
1401  */
1402 static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
1403         ir_mode *other = get_irn_mode(irn);
1404         if (other != mode) {
1405                 /* different modes: check if conversion is possible without changing the bits */
1406                 if (can_convert_to(other, mode)) {
1407                         ir_node *block = get_nodes_block(irn);
1408                         return new_r_Conv(current_ir_graph, block, irn, mode);
1409                 }
1410                 /* otherwise not possible ... yet */
1411                 return NULL;
1412         }
1413         return irn;
1414 }
1415
1416 /**
1417  * Do forward dataflow analysis on the given block and calculate the
1418  * GEN and KILL in the current (avail) set.
1419  *
1420  * @param bl  the block
1421  */
1422 static void calc_gen_kill_avail(block_t *bl) {
1423         memop_t *op;
1424         ir_node *def;
1425
1426         for (op = bl->memop_forward; op != NULL; op = op->next) {
1427                 switch (get_irn_opcode(op->node)) {
1428                 case iro_Phi:
1429                         /* meet */
1430                         break;
1431                 case iro_Sync:
1432                         /* join */
1433                         break;
1434                 case iro_Load:
1435                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1436                                 /* do we have this already? */
1437                                 memop_t *other = find_address(&op->value);
1438                                 if (other != NULL && other != op) {
1439                                         def = conv_to(other->value.value, op->value.mode);
1440                                         if (def != NULL) {
1441 #ifdef DEBUG_libfirm
1442                                                 if (is_Store(other->node)) {
1443                                                         /* RAW */
1444                                                         DB((dbg, LEVEL_1, "RAW %+F <- %+F(%+F)\n", op->node, def, other->node));
1445                                                 } else {
1446                                                         /* RAR */
1447                                                         DB((dbg, LEVEL_1, "RAR %+F <- %+F(%+F)\n", op->node, def, other->node));
1448                                                 }
1449 #endif
1450                                                 mark_replace_load(op, def);
1451                                         } else {
1452                                                 /* overwrite it */
1453                                                 add_memop(op);
1454                                         }
1455                                 } else {
1456                                         /* add this value */
1457                                         kill_stores(&op->value);
1458                                         add_memop(op);
1459                                 }
1460                         }
1461                         break;
1462                 case iro_Store:
1463                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1464                                 /* do we have this store already */
1465                                 memop_t *other = find_address(&op->value);
1466                                 if (other != NULL) {
1467                                         if (is_Store(other->node)) {
1468                                                 if (op != other && get_nodes_block(other->node) == get_nodes_block(op->node)) {
1469                                                         /*
1470                                                          * A WAW in the same block we can kick the first store.
1471                                                          * This is a shortcut: we know that the second Store will be anticipated
1472                                                          * then in an case.
1473                                                          */
1474                                                         DB((dbg, LEVEL_1, "WAW %+F <- %+F\n", op->node, other->node));
1475                                                         mark_remove_store(other);
1476                                                         /* FIXME: a Load might be get freed due to this killed store */
1477                                                 }
1478                                         } else if (other->value.value == op->value.value) {
1479                                                 /* WAR */
1480                                                 DB((dbg, LEVEL_1, "WAR %+F <- %+F\n", op->node, other->node));
1481                                                 mark_remove_store(op);
1482                                         } else {
1483                                                 /* we overwrite the value that was loaded */
1484                                                 add_memop(op);
1485                                         }
1486                                 } else {
1487                                         /* add this value */
1488                                         kill_memops(&op->value);
1489                                         add_memop(op);
1490                                 }
1491                         }
1492                         break;
1493                 default:
1494                         switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
1495                         case FLAG_KILL_LOADS|FLAG_KILL_STORES:
1496                                 kill_all();
1497                                 break;
1498                         case FLAG_KILL_LOADS:
1499                                 kill_all_loads();
1500                                 break;
1501                         case FLAG_KILL_STORES:
1502                                 kill_all_stores();
1503                                 break;
1504                         case 0:
1505                                 break;
1506                         }
1507                 }
1508         }
1509 }
1510
1511 #define BYTE_SIZE(x)  (((x) + 7) >> 3)
1512
1513 /**
1514  * Do forward dataflow analysis on a given block to calculate the avail_out set
1515  * for this block only.
1516  *
1517  * @param block  the block
1518  */
1519 static void forward_avail(block_t *bl) {
1520         /* fill the data from the current block */
1521         env.curr_id_2_memop = bl->id_2_memop_avail;
1522         env.curr_set        = bl->avail_out;
1523
1524         calc_gen_kill_avail(bl);
1525         dump_curr(bl, "Avail_out");
1526 }
1527
1528 /**
1529  * Do backward dataflow analysis on a given block to calculate the antic set
1530  * of Loaded addresses.
1531  *
1532  * @param bl  the block
1533  *
1534  * @return non-zero if the set has changed since last iteration
1535  */
1536 static int backward_antic(block_t *bl) {
1537         memop_t *op;
1538         int     n = get_Block_n_cfg_outs(bl->block);
1539
1540         if (n >= 1) {
1541                 ir_node *succ    = get_Block_cfg_out(bl->block, 0);
1542                 block_t *succ_bl = get_block_entry(succ);
1543                 int i;
1544
1545                 rbitset_cpy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1546                 memcpy(env.curr_id_2_memop, succ_bl->id_2_memop_antic, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1547
1548                 for (i = n - 1; i > 0; --i) {
1549                         ir_node *succ    = get_Block_cfg_out(bl->block, i);
1550                         block_t *succ_bl = get_block_entry(succ);
1551
1552                         rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1553                 }
1554         } else {
1555                 /* block ends with a noreturn call */
1556                 kill_all();
1557         }
1558
1559 #if 0
1560         /* cleanup: kill those Loads which address is not available */
1561         for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1562                 memop_t *op     = succ_bl->id_2_memop[pos];
1563                 ir_node *ptr    = get_Load_ptr(op->node);
1564                 ir_node *ptr_bl = get_nodes_block(ptr);
1565
1566                 if (!block_dominates(ptr_bl, bl->block))
1567                         rbitset_clear(env.curr_set, pos);
1568         }
1569 #endif
1570
1571         dump_curr(bl, "AnticL_out");
1572
1573         for (op = bl->memop_backward; op != NULL; op = op->prev) {
1574                 switch (get_irn_opcode(op->node)) {
1575                 case iro_Phi:
1576                         /* meet */
1577                         break;
1578                 case iro_Sync:
1579                         /* join */
1580                         break;
1581                 case iro_Load:
1582                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1583                                 /* always add it */
1584                                 add_memop(op);
1585                         }
1586                         break;
1587                 case iro_Store:
1588                         if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1589                                 /* a Store: check which memops must be killed */
1590                                 kill_memops(&op->value);
1591                         }
1592                         break;
1593                 default:
1594                         switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
1595                         case FLAG_KILL_LOADS|FLAG_KILL_STORES:
1596                                 kill_all();
1597                                 break;
1598                         case FLAG_KILL_LOADS:
1599                                 kill_all_loads();
1600                                 break;
1601                         case FLAG_KILL_STORES:
1602                                 /*kill_all_stores();*/
1603                                 break;
1604                         case 0:
1605                                 break;
1606                         }
1607                 }
1608         }
1609
1610         memcpy(bl->id_2_memop_antic, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1611         if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1612                 /* changed */
1613                 rbitset_cpy(bl->anticL_in, env.curr_set, env.rbs_size);
1614                 dump_curr(bl, "AnticL_in*");
1615                 return 1;
1616         }
1617         dump_curr(bl, "AnticL_in");
1618         return 0;
1619 }
1620
1621 /**
1622  * Replace a Load memop by a already known value.
1623  *
1624  * @param op  the Load memop
1625  */
1626 static void replace_load(memop_t *op) {
1627         ir_node *load = op->node;
1628         ir_node *def  = skip_Id(op->replace);
1629         ir_node *proj;
1630         ir_mode *mode;
1631
1632         if (def != NULL)
1633                 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1634         else {
1635                 if (op->flags & FLAG_EXCEPTION) {
1636                         /* bad: this node is unused and executed for exception only */
1637                         DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1638                         return;
1639                 }
1640                 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1641         }
1642
1643         if (op->mem != NULL) {
1644                 /* in rare cases a Load might have NO memory */
1645                 exchange(op->mem, get_Load_mem(load));
1646         }
1647         proj = op->projs[pn_Load_res];
1648         if (proj != NULL) {
1649                 mode = get_irn_mode(proj);
1650                 if (get_irn_mode(def) != mode) {
1651                         /* a hidden cast */
1652                         dbg_info *db    = get_irn_dbg_info(load);
1653                         ir_node  *block = get_nodes_block(proj);
1654                         def = new_rd_Conv(db, current_ir_graph, block, def, mode);
1655                 }
1656                 exchange(proj, def);
1657         }
1658         proj = op->projs[pn_Load_X_except];
1659         if (proj != NULL) {
1660                 exchange(proj, new_Bad());
1661         }
1662         proj = op->projs[pn_Load_X_regular];
1663         if (proj != NULL) {
1664                 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1665         }
1666 }
1667
1668 /**
1669  * Remove a Store memop.
1670  *
1671  * @param op  the Store memop
1672  */
1673 static void remove_store(memop_t *op) {
1674         ir_node *store = op->node;
1675         ir_node *proj;
1676
1677         DB((dbg, LEVEL_1, "Removing %+F\n", store));
1678
1679         if (op->mem != NULL) {
1680                 /* in rare cases a Store might have no memory */
1681                 exchange(op->mem, get_Store_mem(store));
1682         }
1683         proj = op->projs[pn_Store_X_except];
1684         if (proj != NULL) {
1685                 exchange(proj, new_Bad());
1686         }
1687         proj = op->projs[pn_Store_X_regular];
1688         if (proj != NULL) {
1689                 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(store)));
1690         }
1691 }
1692
1693
1694 /**
1695  * Do all necessary replacements for a given block.
1696  *
1697  * @param bl  the block
1698  */
1699 static void do_replacements(block_t *bl) {
1700         memop_t *op;
1701
1702         for (op = bl->memop_forward; op != NULL; op = op->next) {
1703                 if (op->flags & FLAG_KILLED_NODE) {
1704                         switch (get_irn_opcode(op->node)) {
1705                         case iro_Load:
1706                                 replace_load(op);
1707                                 break;
1708                         case iro_Store:
1709                                 remove_store(op);
1710                                 break;
1711                         }
1712                 }
1713         }
1714 }
1715
1716 /**
1717  * Calculate the Avail_out sets for all basic blocks.
1718  */
1719 static void calcAvail(void) {
1720         memop_t  **tmp_memop = env.curr_id_2_memop;
1721         unsigned *tmp_set    = env.curr_set;
1722         block_t  *bl;
1723
1724         /* calculate avail_out */
1725         DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1726
1727         /* iterate over all blocks in in any order, skip the start block */
1728         for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1729                 forward_avail(bl);
1730         }
1731
1732         /* restore the current sets */
1733         env.curr_id_2_memop = tmp_memop;
1734         env.curr_set        = tmp_set;
1735 }
1736
1737 /**
1738  * Calculate the Antic_in sets for all basic blocks.
1739  */
1740 static void calcAntic(void) {
1741         int i, need_iter;
1742
1743         /* calculate antic_out */
1744         DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1745         i = 0;
1746         do {
1747                 block_t *bl;
1748
1749                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1750
1751                 need_iter = 0;
1752
1753                 /* over all blocks in reverse post order */
1754                 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1755                         need_iter |= backward_antic(bl);
1756                 }
1757                 ++i;
1758         } while (need_iter);
1759         DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1760 }
1761
1762 /**
1763  * Return the node representing the last memory in a block.
1764  *
1765  * @param bl  the block
1766  */
1767 static ir_node *find_first_memory(block_t *bl) {
1768         for (;;) {
1769                 if (bl->memop_forward != NULL) {
1770                         return bl->memop_forward->node;
1771                 }
1772                 /* if there is NO memory in this block, go to the post dominator */
1773                 bl = get_block_entry(get_Block_ipostdom(bl->block));
1774         }
1775 }
1776
1777 /**
1778  * Return the node representing the last memory in a block.
1779  *
1780  * @param bl  the block
1781  */
1782 static ir_node *find_last_memory(block_t *bl) {
1783         for (;;) {
1784                 if (bl->memop_backward != NULL) {
1785                         return bl->memop_backward->mem;
1786                 }
1787                 /* if there is NO memory in this block, go to the dominator */
1788                 bl = get_block_entry(get_Block_idom(bl->block));
1789         }
1790 }
1791
1792 /**
1793  * Reroute all memory users of old memory
1794  * to a new memory IR-node.
1795  *
1796  * @param omem  the old memory IR-node
1797  * @param nmem  the new memory IR-node
1798  */
1799 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
1800         int i;
1801
1802         for (i = get_irn_n_outs(omem) - 1; i >= 0; --i) {
1803                 int     n_pos;
1804                 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1805
1806                 set_irn_n(user, n_pos, nmem);
1807         }
1808
1809         /* all edges previously point to omem now point to nmem */
1810         nmem->out = omem->out;
1811 }
1812
1813 /**
1814  * Reroute memory users of old memory that are dominated by a given block
1815  * to a new memory IR-node.
1816  *
1817  * @param omem     the old memory IR-node
1818  * @param nmem     the new memory IR-node
1819  * @param pass_bl  the block the memory must pass
1820  */
1821 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl) {
1822         int             i, j, n = get_irn_n_outs(omem);
1823         ir_def_use_edge *edges = NEW_ARR_D(ir_def_use_edge, &env.obst, n + 1);
1824
1825         for (i = j = 0; i < n; ++i) {
1826                 int     n_pos;
1827                 ir_node *user   = get_irn_out_ex(omem, i, &n_pos);
1828                 ir_node *use_bl = get_nodes_block(user);
1829
1830
1831                 if (is_Phi(user)) {
1832                         use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1833                 }
1834                 if (block_dominates(pass_bl, use_bl)) {
1835                         /* found an user that is dominated */
1836                         ++j;
1837                         edges[j].pos = n_pos;
1838                         edges[j].use = user;
1839
1840                         set_irn_n(user, n_pos, nmem);
1841                 }
1842         }
1843
1844         /* Modify the out structure: we create a new out edge array on our
1845            temporary obstack here. This should be no problem, as we invalidate the edges
1846            at the end either. */
1847         /* first entry is used for the length */
1848         edges[0].pos = j;
1849         nmem->out = edges;
1850 }
1851
1852 /**
1853  * insert Loads, making partly redundant Loads fully redundant
1854  */
1855 static int insert_Load(block_t *bl) {
1856         ir_node  *block = bl->block;
1857         int      i, n = get_Block_n_cfgpreds(block);
1858         unsigned pos = 0;
1859         unsigned end = env.n_mem_ops * 2 - 1;
1860         int      res = 0;
1861         ir_node  *pred    = get_Block_cfgpred_block(bl->block, 0);
1862         block_t  *pred_bl = get_block_entry(pred);
1863
1864         DB((dbg, LEVEL_3, "processing %+F\n", block));
1865
1866         rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
1867
1868         if (n > 1) {
1869                 int     i, pos;
1870                 ir_node **ins;
1871
1872                 NEW_ARR_A(ir_node *, ins, n);
1873
1874                 /* more than one predecessors, calculate the join for all avail_outs */
1875                 for (i = n - 1; i > 0; --i) {
1876                         ir_node *pred    = skip_Proj(get_Block_cfgpred(block, i));
1877                         block_t *pred_bl = get_block_entry(get_nodes_block(pred));
1878
1879                         rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
1880
1881                         if (is_Load(pred) || is_Store(pred)) {
1882                                 /* We reached this block by an exception from a Load or Store:
1883                                  * the memop creating the exception was NOT completed than, kill it
1884                                  */
1885                                 memop_t *exc_op = get_irn_memop(pred);
1886                                 rbitset_clear(env.curr_set, exc_op->value.id);
1887                         }
1888
1889                 }
1890                 /*
1891                  * Ensure that all values are in the map: build Phi's if necessary:
1892                  * Note: the last bit is the sentinel and ALWAYS set, so start with -2.
1893                  */
1894                 for (pos = env.rbs_size - 2; pos >= 0; --pos) {
1895                         if (! rbitset_is_set(env.curr_set, pos))
1896                                 env.curr_id_2_memop[pos] = NULL;
1897                         else {
1898                                 ir_node *pred    = get_Block_cfgpred_block(bl->block, 0);
1899                                 block_t *pred_bl = get_block_entry(pred);
1900                                 int     need_phi = 0;
1901                                 ir_mode *mode;
1902                                 memop_t *first;
1903
1904                                 first  = pred_bl->id_2_memop_avail[pos];
1905                                 ins[0] = first->value.value;
1906                                 mode = get_irn_mode(ins[0]);
1907
1908                                 for (i = 1; i < n; ++i) {
1909                                         pred    = get_Block_cfgpred_block(bl->block, i);
1910                                         pred_bl = get_block_entry(pred);
1911
1912                                         ins[i] = conv_to(pred_bl->id_2_memop_avail[pos]->value.value, mode);
1913                                         if (ins[i] != ins[0]) {
1914                                                 need_phi = 1;
1915                                                 if (ins[i] == NULL) {
1916                                                         /* conversion failed */
1917                                                         need_phi = 2;
1918                                                         break;
1919                                                 }
1920                                         }
1921
1922                                 }
1923                                 switch (need_phi) {
1924                                 case 0:
1925                                         /* no Phi needed */
1926                                         env.curr_id_2_memop[pos] = first;
1927                                         break;
1928                                 case 1: {
1929                                         /* build a Phi  */
1930                                         ir_node *phi = new_r_Phi(current_ir_graph, bl->block, n, ins, mode);
1931                                         memop_t *phiop = alloc_memop(phi);
1932
1933                                         phiop->value = first->value;
1934                                         phiop->value.value = phi;
1935
1936                                         /* no need to link it in, as it is a DATA phi */
1937
1938                                         env.curr_id_2_memop[pos] = phiop;
1939
1940                                         DB((dbg, LEVEL_3, "Created new %+F on merging value for address %+F\n", phi, first->value.address));
1941                                         break;
1942                                 }
1943                                 default:
1944                                         /* not possible because of different modes, delete the entry */
1945                                         rbitset_clear(env.curr_set, pos);
1946                                         break;
1947                                 }
1948                         }
1949                 }
1950         } else {
1951                 /* only one predecessor, simply copy the map */
1952                 memcpy(env.curr_id_2_memop, pred_bl->id_2_memop_avail, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
1953         }
1954
1955
1956         /* recalculate avail by gen and kill */
1957         calc_gen_kill_avail(bl);
1958
1959         if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
1960                 /* the avail set has changed */
1961                 rbitset_cpy(bl->avail_out, env.curr_set, env.rbs_size);
1962                 memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1963                 dump_curr(bl, "Avail_out*");
1964                 res = 1;
1965         }
1966
1967         if (n <= 1)
1968                 return res;
1969
1970         /* check for partly redundant values */
1971         for (pos = rbitset_next(bl->anticL_in, pos, 1); pos != end; pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
1972                 memop_t *op = bl->id_2_memop_antic[pos];
1973                 int     have_some, all_same;
1974                 ir_node *first;
1975
1976                 assert(is_Load(op->node));
1977
1978                 if (op->flags & FLAG_KILLED_NODE)
1979                         continue;
1980                 DB((dbg, LEVEL_3, "anticipated %+F\n", op->node));
1981
1982                 have_some  = 0;
1983                 all_same   = 1;
1984                 first      = 0;
1985                 for (i = n - 1; i >= 0; --i) {
1986                         ir_node *pred    = get_Block_cfgpred_block(block, i);
1987                         block_t *pred_bl = get_block_entry(pred);
1988                         memop_t *e       = find_address_avail(pred_bl, &op->value);
1989                         ir_mode *mode    = op->value.mode;
1990
1991                         if (e == NULL) {
1992                                 ir_node *block = get_nodes_block(op->value.address);
1993                                 if (! block_dominates(block, pred)) {
1994                                         /* cannot place a copy here */
1995                                         have_some = 0;
1996                                         break;
1997                                 }
1998                                 DB((dbg, LEVEL_3, "%+F is not available in predecessor %+F\n", op->node, pred));
1999                                 pred_bl->avail = NULL;
2000                                 all_same       = 0;
2001                         } else {
2002                                 if (e->value.mode != mode && !can_convert_to(e->value.mode, mode)) {
2003                                         /* cannot create a Phi due to different modes */
2004                                         have_some = 0;
2005                                         break;
2006                                 }
2007                                 pred_bl->avail = e;
2008                                 have_some      = 1;
2009                                 DB((dbg, LEVEL_3, "%+F is available for %+F in predecessor %+F\n", e->node, op->node, pred));
2010                                 if (first == NULL)
2011                                         first = e->node;
2012                                 else if (first != e->node)
2013                                         all_same = 0;
2014                         }
2015                 }
2016                 if (have_some && !all_same) {
2017                         ir_mode *mode = op->value.mode;
2018                         ir_node **in, *phi;
2019
2020                         NEW_ARR_A(ir_node *, in, n);
2021
2022                         for (i = n - 1; i >= 0; --i) {
2023                                 ir_node *pred    = get_Block_cfgpred_block(block, i);
2024                                 block_t *pred_bl = get_block_entry(pred);
2025
2026                                 if (pred_bl->avail == NULL) {
2027                                         /* create a new Load here and make to make it fully redundant */
2028                                         dbg_info *db       = get_irn_dbg_info(op->node);
2029                                         ir_node  *last_mem = find_last_memory(pred_bl);
2030                                         ir_node  *load, *def;
2031                                         memop_t  *new_op;
2032
2033                                         assert(last_mem != NULL);
2034                                         load = new_rd_Load(db, current_ir_graph, pred, last_mem, op->value.address, mode, cons_none);
2035                                         def  = new_r_Proj(current_ir_graph, pred, load, mode, pn_Load_res);
2036                                         DB((dbg, LEVEL_1, "Created new %+F in %+F for party redundant %+F\n", load, pred, op->node));
2037
2038                                         new_op                = alloc_memop(load);
2039                                         new_op->mem           = new_r_Proj(current_ir_graph, pred, load, mode_M, pn_Load_M);
2040                                         new_op->value.address = op->value.address;
2041                                         new_op->value.id      = op->value.id;
2042                                         new_op->value.mode    = mode;
2043                                         new_op->value.value   = def;
2044
2045                                         new_op->projs[pn_Load_M]   = new_op->mem;
2046                                         new_op->projs[pn_Load_res] = def;
2047
2048                                         new_op->prev            = pred_bl->memop_backward;
2049                                         pred_bl->memop_backward = new_op;
2050
2051                                         if (pred_bl->memop_forward == NULL)
2052                                                 pred_bl->memop_forward = new_op;
2053
2054                                         if (get_nodes_block(last_mem) == pred) {
2055                                                 /* We have add a new last memory op in pred block.
2056                                                    If pred had already a last mem, reroute all memory
2057                                                    users. */
2058                                                 reroute_all_mem_users(last_mem, new_op->mem);
2059                                         } else {
2060                                                 /* reroute only those memory going through the pre block */
2061                                                 reroute_mem_through(last_mem, new_op->mem, pred);
2062                                         }
2063
2064                                         /* we added this load at the end, so it will be avail anyway */
2065                                         add_memop_avail(pred_bl, new_op);
2066                                         pred_bl->avail = new_op;
2067                                 }
2068                                 in[i] = conv_to(pred_bl->avail->value.value, mode);
2069                         }
2070                         phi = new_r_Phi(current_ir_graph, block, n, in, mode);
2071                         DB((dbg, LEVEL_1, "Created new %+F in %+F for now redundant %+F\n", phi, block, op->node));
2072
2073                         if (get_nodes_block(op->node) == block) {
2074                                 /* The redundant node was in the current block:
2075                                    In that case, DO NOT update avail_out. If it was NOT
2076                                    avail although it is executed in this bLock, it is killed by a later
2077                                    instruction.
2078                                  */
2079                                 memop_t *phi_op = clone_memop_phi(op, phi);
2080
2081                                 update_memop_avail(bl, phi_op);
2082
2083                                 mark_replace_load(op, phi);
2084                         } else {
2085                                 /* The redundant node is NOT in the current block and anticipated. */
2086                                 memop_t *phi_op = clone_memop_phi(op, phi);
2087
2088                                 add_memop_avail(bl, phi_op);
2089
2090                                 /* propagate it downwards */
2091                                 res = 1;
2092                         }
2093                         /* clear it so we do not found it the next iteration */
2094                         rbitset_clear(bl->anticL_in, pos);
2095                 }
2096         }
2097         return res;
2098 }
2099
2100 /**
2101  * Insert Loads upwards.
2102  */
2103 static void insert_Loads_upwards(void) {
2104         int i, need_iter;
2105         block_t *bl;
2106
2107         /* recalculate antic_out and insert Loads */
2108         DB((dbg, LEVEL_2, "Inserting Loads\n"));
2109
2110         i = 0;
2111         do {
2112                 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
2113
2114                 need_iter = 0;
2115
2116                 /* over all blocks in reverse post order, skip the start block */
2117                 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
2118                         need_iter |= insert_Load(bl);
2119                 }
2120                 ++i;
2121         } while (need_iter);
2122
2123         DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
2124 }
2125
2126 int opt_ldst(ir_graph *irg) {
2127         block_t  *bl;
2128         ir_graph *rem = current_ir_graph;
2129
2130         current_ir_graph = irg;
2131
2132         FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
2133 //      firm_dbg_set_mask(dbg, -1);
2134
2135         DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
2136
2137         /* we need landing pads */
2138         remove_critical_cf_edges(irg);
2139
2140         dump_ir_block_graph(irg, "-XXX");
2141
2142         if (get_opt_alias_analysis()) {
2143                 assure_irg_entity_usage_computed(irg);
2144                 assure_irp_globals_entity_usage_computed();
2145         }
2146
2147         obstack_init(&env.obst);
2148         ir_nodemap_init(&env.adr_map);
2149
2150         env.forward     = NULL;
2151         env.backward    = NULL;
2152         env.curr_adr_id = 0;
2153         env.n_mem_ops   = 0;
2154         env.changed     = 0;
2155         env.start_bl    = get_irg_start_block(irg);
2156         env.end_bl      = get_irg_end_block(irg);
2157
2158         assure_doms(irg);
2159         assure_irg_outs(irg);
2160
2161         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
2162
2163         /* first step: allocate block entries. Note that some blocks might be
2164            unreachable here. Using the normal walk ensures that ALL blocks are initialised. */
2165         irg_block_walk_graph(irg, NULL, prepare_blocks, NULL);
2166
2167         /* produce an inverse post-order list for the CFG: this links only reachable
2168            blocks */
2169         irg_out_block_walk(get_irg_start_block(irg), NULL, inverse_post_order, NULL);
2170
2171         bl = get_block_entry(env.end_bl);
2172         if (!(bl->flags & BLK_FLAG_REACHABLE)) {
2173                 /*
2174                  * The end block is NOT reachable due to endless loops
2175                  * or no_return calls.
2176                  * Place the end block last.
2177                  * env.backward points to the last block in the list for this purpose.
2178                  */
2179                 env.backward->forward_next = bl;
2180
2181                 bl->flags |= BLK_FLAG_REACHABLE;
2182         }
2183
2184         /* second step: find and sort all memory ops */
2185         walk_memory_irg(irg, collect_memops, NULL, NULL);
2186
2187         if (env.n_mem_ops == 0) {
2188                 /* no memory ops */
2189                 goto end;
2190         }
2191
2192         /* create the backward links. */
2193         env.backward = NULL;
2194         irg_block_walk_graph(irg, NULL, collect_backward, NULL);
2195
2196         /* link the end block in */
2197         bl = get_block_entry(env.end_bl);
2198         bl->backward_next = env.backward;
2199         env.backward      = bl;
2200
2201         /* check that we really start with the start / end block */
2202         assert(env.forward->block  == env.start_bl);
2203         assert(env.backward->block == env.end_bl);
2204
2205         /* create address sets: for now, only the existing addresses are allowed plus one
2206            needed for the sentinel */
2207         env.rbs_size = env.n_mem_ops + 1;
2208
2209         /* create the current set */
2210         env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2211         rbitset_set(env.curr_set, env.rbs_size - 1);
2212         env.curr_id_2_memop = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2213         memset(env.curr_id_2_memop, 0, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2214
2215         for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2216                 /* set sentinel bits */
2217                 bl->avail_out  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2218                 rbitset_set(bl->avail_out, env.rbs_size - 1);
2219
2220                 bl->id_2_memop_avail = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2221                 memset(bl->id_2_memop_avail, 0, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
2222
2223                 bl->anticL_in  = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2224                 rbitset_set(bl->anticL_in, env.rbs_size - 1);
2225
2226                 bl->id_2_memop_antic = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2227                 memset(bl->id_2_memop_antic, 0, env.rbs_size * sizeof(bl->id_2_memop_antic[0]));
2228         }
2229
2230 //      dump_block_list(&env);
2231
2232         calcAvail();
2233         calcAntic();
2234
2235         insert_Loads_upwards();
2236
2237         if (env.changed) {
2238                 /* over all blocks in reverse post order */
2239                 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2240                         do_replacements(bl);
2241                 }
2242
2243                 /* not only invalidate but free them. We might allocate new out arrays
2244                    on our obstack which will be deleted yet. */
2245                 free_irg_outs(irg);
2246                 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
2247         }
2248 end:
2249
2250         ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
2251         ir_nodemap_destroy(&env.adr_map);
2252         obstack_free(&env.obst, NULL);
2253
2254         dump_ir_block_graph(irg, "-YYY");
2255
2256         current_ir_graph = rem;
2257
2258         return env.changed != 0;
2259 }