Add the * for the type in foreach_pset() automatically.
[libfirm] / ir / opt / ldstopt.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Load/Store optimizations.
23  * @author  Michael Beck
24  */
25 #include "config.h"
26
27 #include <string.h>
28
29 #include "iroptimize.h"
30 #include "irnode_t.h"
31 #include "irgraph_t.h"
32 #include "irmode_t.h"
33 #include "iropt_t.h"
34 #include "ircons_t.h"
35 #include "irgmod.h"
36 #include "irgwalk.h"
37 #include "irtools.h"
38 #include "tv_t.h"
39 #include "dbginfo_t.h"
40 #include "iropt_dbg.h"
41 #include "irflag_t.h"
42 #include "array_t.h"
43 #include "irhooks.h"
44 #include "iredges.h"
45 #include "irpass.h"
46 #include "irmemory.h"
47 #include "irnodehashmap.h"
48 #include "irgopt.h"
49 #include "set.h"
50 #include "be.h"
51 #include "debug.h"
52
53 /** The debug handle. */
54 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
55
56 #undef IMAX
57 #define IMAX(a,b)   ((a) > (b) ? (a) : (b))
58
59 #define MAX_PROJ    IMAX(IMAX((long)pn_Load_max, (long)pn_Store_max), (long)pn_Call_max)
60
61 enum changes_t {
62         DF_CHANGED = 1,       /**< data flow changed */
63         CF_CHANGED = 2,       /**< control flow changed */
64 };
65
66 /**
67  * walker environment
68  */
69 typedef struct walk_env_t {
70         struct obstack obst;          /**< list of all stores */
71         unsigned changes;             /**< a bitmask of graph changes */
72 } walk_env_t;
73
74 /** A Load/Store info. */
75 typedef struct ldst_info_t {
76         ir_node  *projs[MAX_PROJ+1];  /**< list of Proj's of this node */
77         ir_node  *exc_block;          /**< the exception block if available */
78         int      exc_idx;             /**< predecessor index in the exception block */
79         unsigned visited;             /**< visited counter for breaking loops */
80 } ldst_info_t;
81
82 /**
83  * flags for control flow.
84  */
85 enum block_flags_t {
86         BLOCK_HAS_COND = 1,      /**< Block has conditional control flow */
87         BLOCK_HAS_EXC  = 2       /**< Block has exceptional control flow */
88 };
89
90 /**
91  * a Block info.
92  */
93 typedef struct block_info_t {
94         unsigned flags;               /**< flags for the block */
95 } block_info_t;
96
97 /** the master visited flag for loop detection. */
98 static unsigned master_visited = 0;
99
100 #define INC_MASTER()       ++master_visited
101 #define MARK_NODE(info)    (info)->visited = master_visited
102 #define NODE_VISITED(info) (info)->visited >= master_visited
103
104 /**
105  * get the Load/Store info of a node
106  */
107 static ldst_info_t *get_ldst_info(ir_node *node, struct obstack *obst)
108 {
109         ldst_info_t *info = (ldst_info_t*)get_irn_link(node);
110
111         if (! info) {
112                 info = OALLOCZ(obst, ldst_info_t);
113                 set_irn_link(node, info);
114         }
115         return info;
116 }  /* get_ldst_info */
117
118 /**
119  * get the Block info of a node
120  */
121 static block_info_t *get_block_info(ir_node *node, struct obstack *obst)
122 {
123         block_info_t *info = (block_info_t*)get_irn_link(node);
124
125         if (! info) {
126                 info = OALLOCZ(obst, block_info_t);
127                 set_irn_link(node, info);
128         }
129         return info;
130 }  /* get_block_info */
131
132 /**
133  * update the projection info for a Load/Store
134  */
135 static unsigned update_projs(ldst_info_t *info, ir_node *proj)
136 {
137         long nr = get_Proj_proj(proj);
138
139         assert(0 <= nr && nr <= MAX_PROJ && "Wrong proj from LoadStore");
140
141         if (info->projs[nr]) {
142                 /* there is already one, do CSE */
143                 exchange(proj, info->projs[nr]);
144                 return DF_CHANGED;
145         }
146         else {
147                 info->projs[nr] = proj;
148                 return 0;
149         }
150 }  /* update_projs */
151
152 /**
153  * update the exception block info for a Load/Store node.
154  *
155  * @param info   the load/store info struct
156  * @param block  the exception handler block for this load/store
157  * @param pos    the control flow input of the block
158  */
159 static unsigned update_exc(ldst_info_t *info, ir_node *block, int pos)
160 {
161         assert(info->exc_block == NULL && "more than one exception block found");
162
163         info->exc_block = block;
164         info->exc_idx   = pos;
165         return 0;
166 }  /* update_exc */
167
168 /** Return the number of uses of an address node */
169 #define get_irn_n_uses(adr)     get_irn_n_edges(adr)
170
171 /**
172  * walker, collects all Load/Store/Proj nodes
173  *
174  * walks from Start -> End
175  */
176 static void collect_nodes(ir_node *node, void *env)
177 {
178         walk_env_t  *wenv   = (walk_env_t *)env;
179         unsigned     opcode = get_irn_opcode(node);
180         ir_node     *pred, *blk, *pred_blk;
181         ldst_info_t *ldst_info;
182
183         if (opcode == iro_Proj) {
184                 pred   = get_Proj_pred(node);
185                 opcode = get_irn_opcode(pred);
186
187                 if (opcode == iro_Load || opcode == iro_Store || opcode == iro_Call) {
188                         ldst_info = get_ldst_info(pred, &wenv->obst);
189
190                         wenv->changes |= update_projs(ldst_info, node);
191
192                         /*
193                          * Place the Proj's to the same block as the
194                          * predecessor Load. This is always ok and prevents
195                          * "non-SSA" form after optimizations if the Proj
196                          * is in a wrong block.
197                          */
198                         blk      = get_nodes_block(node);
199                         pred_blk = get_nodes_block(pred);
200                         if (blk != pred_blk) {
201                                 wenv->changes |= DF_CHANGED;
202                                 set_nodes_block(node, pred_blk);
203                         }
204                 }
205         } else if (opcode == iro_Block) {
206                 int i;
207
208                 for (i = get_Block_n_cfgpreds(node) - 1; i >= 0; --i) {
209                         ir_node      *pred_block, *proj;
210                         block_info_t *bl_info;
211                         int          is_exc = 0;
212
213                         pred = proj = get_Block_cfgpred(node, i);
214
215                         if (is_Proj(proj)) {
216                                 pred   = get_Proj_pred(proj);
217                                 is_exc = is_x_except_Proj(proj);
218                         }
219
220                         /* ignore Bad predecessors, they will be removed later */
221                         if (is_Bad(pred))
222                                 continue;
223
224                         pred_block = get_nodes_block(pred);
225                         bl_info    = get_block_info(pred_block, &wenv->obst);
226
227                         if (is_fragile_op(pred) && is_exc)
228                                 bl_info->flags |= BLOCK_HAS_EXC;
229                         else if (is_irn_forking(pred))
230                                 bl_info->flags |= BLOCK_HAS_COND;
231
232                         opcode = get_irn_opcode(pred);
233                         if (is_exc && (opcode == iro_Load || opcode == iro_Store || opcode == iro_Call)) {
234                                 ldst_info = get_ldst_info(pred, &wenv->obst);
235
236                                 wenv->changes |= update_exc(ldst_info, node, i);
237                         }
238                 }
239         }
240 }  /* collect_nodes */
241
242 /**
243  * Returns an entity if the address ptr points to a constant one.
244  *
245  * @param ptr  the address
246  *
247  * @return an entity or NULL
248  */
249 static ir_entity *find_constant_entity(ir_node *ptr)
250 {
251         for (;;) {
252                 if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
253                         return get_SymConst_entity(ptr);
254                 } else if (is_Sel(ptr)) {
255                         ir_entity *ent = get_Sel_entity(ptr);
256                         ir_type   *tp  = get_entity_owner(ent);
257
258                         /* Do not fiddle with polymorphism. */
259                         if (is_Class_type(get_entity_owner(ent)) &&
260                                 ((get_entity_n_overwrites(ent)    != 0) ||
261                                 (get_entity_n_overwrittenby(ent) != 0)   ) )
262                                 return NULL;
263
264                         if (is_Array_type(tp)) {
265                                 /* check bounds */
266                                 int i, n;
267
268                                 for (i = 0, n = get_Sel_n_indexs(ptr); i < n; ++i) {
269                                         ir_node   *bound;
270                                         ir_tarval *tlower, *tupper;
271                                         ir_node   *index = get_Sel_index(ptr, i);
272                                         ir_tarval *tv    = computed_value(index);
273
274                                         /* check if the index is constant */
275                                         if (tv == tarval_bad)
276                                                 return NULL;
277
278                                         bound  = get_array_lower_bound(tp, i);
279                                         tlower = computed_value(bound);
280                                         bound  = get_array_upper_bound(tp, i);
281                                         tupper = computed_value(bound);
282
283                                         if (tlower == tarval_bad || tupper == tarval_bad)
284                                                 return NULL;
285
286                                         if (tarval_cmp(tv, tlower) == ir_relation_less)
287                                                 return NULL;
288                                         if (tarval_cmp(tupper, tv) == ir_relation_less)
289                                                 return NULL;
290
291                                         /* ok, bounds check finished */
292                                 }
293                         }
294
295                         if (get_entity_linkage(ent) & IR_LINKAGE_CONSTANT)
296                                 return ent;
297
298                         /* try next */
299                         ptr = get_Sel_ptr(ptr);
300                 } else if (is_Add(ptr)) {
301                         ir_node *l = get_Add_left(ptr);
302                         ir_node *r = get_Add_right(ptr);
303
304                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
305                                 ptr = l;
306                         else if (get_irn_mode(r) == get_irn_mode(ptr) && is_Const(l))
307                                 ptr = r;
308                         else
309                                 return NULL;
310
311                         /* for now, we support only one addition, reassoc should fold all others */
312                         if (! is_SymConst(ptr) && !is_Sel(ptr))
313                                 return NULL;
314                 } else if (is_Sub(ptr)) {
315                         ir_node *l = get_Sub_left(ptr);
316                         ir_node *r = get_Sub_right(ptr);
317
318                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
319                                 ptr = l;
320                         else
321                                 return NULL;
322                         /* for now, we support only one substraction, reassoc should fold all others */
323                         if (! is_SymConst(ptr) && !is_Sel(ptr))
324                                 return NULL;
325                 } else
326                         return NULL;
327         }
328 }  /* find_constant_entity */
329
330 /**
331  * Return the Selection index of a Sel node from dimension n
332  */
333 static long get_Sel_array_index_long(ir_node *n, int dim)
334 {
335         ir_node *index = get_Sel_index(n, dim);
336         assert(is_Const(index));
337         return get_tarval_long(get_Const_tarval(index));
338 }  /* get_Sel_array_index_long */
339
340 typedef struct path_entry {
341         ir_entity         *ent;
342         struct path_entry *next;
343         size_t            index;
344 } path_entry;
345
346 static ir_node *rec_find_compound_ent_value(ir_node *ptr, path_entry *next)
347 {
348         path_entry       entry, *p;
349         ir_entity        *ent, *field;
350         ir_initializer_t *initializer;
351         ir_tarval        *tv;
352         ir_type          *tp;
353         size_t           n;
354
355         entry.next = next;
356         if (is_SymConst(ptr)) {
357                 /* found the root */
358                 ent         = get_SymConst_entity(ptr);
359                 initializer = get_entity_initializer(ent);
360                 for (p = next; p != NULL;) {
361                         if (initializer->kind != IR_INITIALIZER_COMPOUND)
362                                 return NULL;
363                         n  = get_initializer_compound_n_entries(initializer);
364                         tp = get_entity_type(ent);
365
366                         if (is_Array_type(tp)) {
367                                 ent = get_array_element_entity(tp);
368                                 if (ent != p->ent) {
369                                         /* a missing [0] */
370                                         if (0 >= n)
371                                                 return NULL;
372                                         initializer = get_initializer_compound_value(initializer, 0);
373                                         continue;
374                                 }
375                         }
376                         if (p->index >= n)
377                                 return NULL;
378                         initializer = get_initializer_compound_value(initializer, p->index);
379
380                         ent = p->ent;
381                         p   = p->next;
382                 }
383                 tp = get_entity_type(ent);
384                 while (is_Array_type(tp)) {
385                         ent = get_array_element_entity(tp);
386                         tp = get_entity_type(ent);
387                         /* a missing [0] */
388                         n  = get_initializer_compound_n_entries(initializer);
389                         if (0 >= n)
390                                 return NULL;
391                         initializer = get_initializer_compound_value(initializer, 0);
392                 }
393
394                 switch (initializer->kind) {
395                 case IR_INITIALIZER_CONST:
396                         return get_initializer_const_value(initializer);
397                 case IR_INITIALIZER_TARVAL:
398                 case IR_INITIALIZER_NULL:
399                 default:
400                         return NULL;
401                 }
402         } else if (is_Sel(ptr)) {
403                 entry.ent = field = get_Sel_entity(ptr);
404                 tp = get_entity_owner(field);
405                 if (is_Array_type(tp)) {
406                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
407                         entry.index = get_Sel_array_index_long(ptr, 0) - get_array_lower_bound_int(tp, 0);
408                 } else {
409                         size_t i, n_members = get_compound_n_members(tp);
410                         for (i = 0; i < n_members; ++i) {
411                                 if (get_compound_member(tp, i) == field)
412                                         break;
413                         }
414                         if (i >= n_members) {
415                                 /* not found: should NOT happen */
416                                 return NULL;
417                         }
418                         entry.index = i;
419                 }
420                 return rec_find_compound_ent_value(get_Sel_ptr(ptr), &entry);
421         }  else if (is_Add(ptr)) {
422                 ir_mode  *mode;
423                 unsigned pos;
424
425                 {
426                         ir_node *l = get_Add_left(ptr);
427                         ir_node *r = get_Add_right(ptr);
428                         if (is_Const(r)) {
429                                 ptr = l;
430                                 tv  = get_Const_tarval(r);
431                         } else {
432                                 ptr = r;
433                                 tv  = get_Const_tarval(l);
434                         }
435                 }
436 ptr_arith:
437                 mode = get_tarval_mode(tv);
438
439                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
440                 if (is_Sel(ptr)) {
441                         field = get_Sel_entity(ptr);
442                 } else {
443                         field = get_SymConst_entity(ptr);
444                 }
445
446                 /* count needed entries */
447                 pos = 0;
448                 for (ent = field;;) {
449                         tp = get_entity_type(ent);
450                         if (! is_Array_type(tp))
451                                 break;
452                         ent = get_array_element_entity(tp);
453                         ++pos;
454                 }
455                 /* should be at least ONE entry */
456                 if (pos == 0)
457                         return NULL;
458
459                 /* allocate the right number of entries */
460                 NEW_ARR_A(path_entry, p, pos);
461
462                 /* fill them up */
463                 pos = 0;
464                 for (ent = field;;) {
465                         unsigned   size;
466                         ir_tarval *sz, *tv_index, *tlower, *tupper;
467                         long       index;
468                         ir_node   *bound;
469
470                         tp = get_entity_type(ent);
471                         if (! is_Array_type(tp))
472                                 break;
473                         ent = get_array_element_entity(tp);
474                         p[pos].ent  = ent;
475                         p[pos].next = &p[pos + 1];
476
477                         size = get_type_size_bytes(get_entity_type(ent));
478                         sz   = new_tarval_from_long(size, mode);
479
480                         tv_index = tarval_div(tv, sz);
481                         tv       = tarval_mod(tv, sz);
482
483                         if (tv_index == tarval_bad || tv == tarval_bad)
484                                 return NULL;
485
486                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
487                         bound  = get_array_lower_bound(tp, 0);
488                         tlower = computed_value(bound);
489                         bound  = get_array_upper_bound(tp, 0);
490                         tupper = computed_value(bound);
491
492                         if (tlower == tarval_bad || tupper == tarval_bad)
493                                 return NULL;
494
495                         if (tarval_cmp(tv_index, tlower) == ir_relation_less)
496                                 return NULL;
497                         if (tarval_cmp(tupper, tv_index) == ir_relation_less)
498                                 return NULL;
499
500                         /* ok, bounds check finished */
501                         index = get_tarval_long(tv_index);
502                         p[pos].index = index;
503                         ++pos;
504                 }
505                 if (! tarval_is_null(tv)) {
506                         /* hmm, wrong access */
507                         return NULL;
508                 }
509                 p[pos - 1].next = next;
510                 return rec_find_compound_ent_value(ptr, p);
511         } else if (is_Sub(ptr)) {
512                 ir_node *l = get_Sub_left(ptr);
513                 ir_node *r = get_Sub_right(ptr);
514
515                 ptr = l;
516                 tv  = get_Const_tarval(r);
517                 tv  = tarval_neg(tv);
518                 goto ptr_arith;
519         }
520         return NULL;
521 }
522
523 static ir_node *find_compound_ent_value(ir_node *ptr)
524 {
525         return rec_find_compound_ent_value(ptr, NULL);
526 }
527
528 /* forward */
529 static void reduce_adr_usage(ir_node *ptr);
530
531 /**
532  * Update a Load that may have lost its users.
533  */
534 static void handle_load_update(ir_node *load)
535 {
536         ldst_info_t *info = (ldst_info_t*)get_irn_link(load);
537
538         /* do NOT touch volatile loads for now */
539         if (get_Load_volatility(load) == volatility_is_volatile)
540                 return;
541
542         if (! info->projs[pn_Load_res] && ! info->projs[pn_Load_X_except]) {
543                 ir_node *ptr = get_Load_ptr(load);
544                 ir_node *mem = get_Load_mem(load);
545
546                 /* a Load whose value is neither used nor exception checked, remove it */
547                 exchange(info->projs[pn_Load_M], mem);
548                 if (info->projs[pn_Load_X_regular])
549                         exchange(info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
550                 kill_node(load);
551                 reduce_adr_usage(ptr);
552         }
553 }  /* handle_load_update */
554
555 /**
556  * A use of an address node has vanished. Check if this was a Proj
557  * node and update the counters.
558  */
559 static void reduce_adr_usage(ir_node *ptr)
560 {
561         ir_node *pred;
562         if (!is_Proj(ptr))
563                 return;
564         if (get_irn_n_edges(ptr) > 0)
565                 return;
566
567         /* this Proj is dead now */
568         pred = get_Proj_pred(ptr);
569         if (is_Load(pred)) {
570                 ldst_info_t *info = (ldst_info_t*)get_irn_link(pred);
571                 info->projs[get_Proj_proj(ptr)] = NULL;
572
573                 /* this node lost its result proj, handle that */
574                 handle_load_update(pred);
575         }
576 }  /* reduce_adr_usage */
577
578 /**
579  * Check, if an already existing value of mode old_mode can be converted
580  * into the needed one new_mode without loss.
581  */
582 static int can_use_stored_value(ir_mode *old_mode, ir_mode *new_mode)
583 {
584         unsigned old_size;
585         unsigned new_size;
586         if (old_mode == new_mode)
587                 return true;
588
589         old_size = get_mode_size_bits(old_mode);
590         new_size = get_mode_size_bits(new_mode);
591
592         /* if both modes are two-complement ones, we can always convert the
593            Stored value into the needed one. (on big endian machines we currently
594            only support this for modes of same size) */
595         if (old_size >= new_size &&
596                   get_mode_arithmetic(old_mode) == irma_twos_complement &&
597                   get_mode_arithmetic(new_mode) == irma_twos_complement &&
598                   (!be_get_backend_param()->byte_order_big_endian
599                 || old_size == new_size)) {
600                 return true;
601         }
602         return false;
603 }
604
605 /**
606  * Check whether a Call is at least pure, i.e. does only read memory.
607  */
608 static unsigned is_Call_pure(ir_node *call)
609 {
610         ir_type *call_tp = get_Call_type(call);
611         unsigned prop = get_method_additional_properties(call_tp);
612
613         /* check first the call type */
614         if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
615                 /* try the called entity */
616                 ir_node *ptr = get_Call_ptr(call);
617
618                 if (is_SymConst_addr_ent(ptr)) {
619                         ir_entity *ent = get_SymConst_entity(ptr);
620
621                         prop = get_entity_additional_properties(ent);
622                 }
623         }
624         return (prop & (mtp_property_const|mtp_property_pure)) != 0;
625 }  /* is_Call_pure */
626
627 static ir_node *get_base_and_offset(ir_node *ptr, long *pOffset)
628 {
629         ir_mode *mode  = get_irn_mode(ptr);
630         long    offset = 0;
631
632         /* TODO: long might not be enough, we should probably use some tarval thingy... */
633         for (;;) {
634                 if (is_Add(ptr)) {
635                         ir_node *l = get_Add_left(ptr);
636                         ir_node *r = get_Add_right(ptr);
637
638                         if (get_irn_mode(l) != mode || !is_Const(r))
639                                 break;
640
641                         offset += get_tarval_long(get_Const_tarval(r));
642                         ptr     = l;
643                 } else if (is_Sub(ptr)) {
644                         ir_node *l = get_Sub_left(ptr);
645                         ir_node *r = get_Sub_right(ptr);
646
647                         if (get_irn_mode(l) != mode || !is_Const(r))
648                                 break;
649
650                         offset -= get_tarval_long(get_Const_tarval(r));
651                         ptr     = l;
652                 } else if (is_Sel(ptr)) {
653                         ir_entity *ent = get_Sel_entity(ptr);
654                         ir_type   *tp  = get_entity_owner(ent);
655
656                         if (is_Array_type(tp)) {
657                                 int     size;
658                                 ir_node *index;
659
660                                 /* only one dimensional arrays yet */
661                                 if (get_Sel_n_indexs(ptr) != 1)
662                                         break;
663                                 index = get_Sel_index(ptr, 0);
664                                 if (! is_Const(index))
665                                         break;
666
667                                 tp = get_entity_type(ent);
668                                 if (get_type_state(tp) != layout_fixed)
669                                         break;
670
671                                 size    = get_type_size_bytes(tp);
672                                 offset += size * get_tarval_long(get_Const_tarval(index));
673                         } else {
674                                 if (get_type_state(tp) != layout_fixed)
675                                         break;
676                                 offset += get_entity_offset(ent);
677                         }
678                         ptr = get_Sel_ptr(ptr);
679                 } else
680                         break;
681         }
682
683         *pOffset = offset;
684         return ptr;
685 }
686
687 static int try_load_after_store(ir_node *load,
688                 ir_node *load_base_ptr, long load_offset, ir_node *store)
689 {
690         ldst_info_t *info;
691         ir_node *store_ptr      = get_Store_ptr(store);
692         long     store_offset;
693         ir_node *store_base_ptr = get_base_and_offset(store_ptr, &store_offset);
694         ir_node *store_value;
695         ir_mode *store_mode;
696         ir_node *load_ptr;
697         ir_mode *load_mode;
698         long     load_mode_len;
699         long     store_mode_len;
700         long     delta;
701         int      res;
702
703         if (load_base_ptr != store_base_ptr)
704                 return 0;
705
706         load_mode      = get_Load_mode(load);
707         load_mode_len  = get_mode_size_bytes(load_mode);
708         store_mode     = get_irn_mode(get_Store_value(store));
709         store_mode_len = get_mode_size_bytes(store_mode);
710         delta          = load_offset - store_offset;
711         store_value    = get_Store_value(store);
712
713         if (delta != 0 || store_mode != load_mode) {
714                 /* TODO: implement for big-endian */
715                 if (delta < 0 || delta + load_mode_len > store_mode_len
716                                 || (be_get_backend_param()->byte_order_big_endian
717                                     && load_mode_len != store_mode_len))
718                         return 0;
719
720                 if (get_mode_arithmetic(store_mode) != irma_twos_complement ||
721                         get_mode_arithmetic(load_mode)  != irma_twos_complement)
722                         return 0;
723
724
725                 /* produce a shift to adjust offset delta */
726                 if (delta > 0) {
727                         ir_node *cnst;
728                         ir_graph *irg = get_irn_irg(load);
729
730                         cnst        = new_r_Const_long(irg, mode_Iu, delta * 8);
731                         store_value = new_r_Shr(get_nodes_block(load),
732                                                                         store_value, cnst, store_mode);
733                 }
734
735                 /* add an convert if needed */
736                 if (store_mode != load_mode) {
737                         store_value = new_r_Conv(get_nodes_block(load), store_value, load_mode);
738                 }
739         }
740
741         DBG_OPT_RAW(load, store_value);
742
743         info = (ldst_info_t*)get_irn_link(load);
744         if (info->projs[pn_Load_M])
745                 exchange(info->projs[pn_Load_M], get_Load_mem(load));
746
747         res = 0;
748         /* no exception */
749         if (info->projs[pn_Load_X_except]) {
750                 ir_graph *irg = get_irn_irg(load);
751                 exchange( info->projs[pn_Load_X_except], new_r_Bad(irg, mode_X));
752                 res |= CF_CHANGED;
753         }
754         if (info->projs[pn_Load_X_regular]) {
755                 exchange( info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
756                 res |= CF_CHANGED;
757         }
758
759         if (info->projs[pn_Load_res])
760                 exchange(info->projs[pn_Load_res], store_value);
761
762         load_ptr = get_Load_ptr(load);
763         kill_node(load);
764         reduce_adr_usage(load_ptr);
765         return res | DF_CHANGED;
766 }
767
768 /**
769  * Follow the memory chain as long as there are only Loads,
770  * alias free Stores, and constant Calls and try to replace the
771  * current Load by a previous ones.
772  * Note that in unreachable loops it might happen that we reach
773  * load again, as well as we can fall into a cycle.
774  * We break such cycles using a special visited flag.
775  *
776  * INC_MASTER() must be called before dive into
777  */
778 static unsigned follow_Mem_chain(ir_node *load, ir_node *curr)
779 {
780         unsigned    res = 0;
781         ldst_info_t *info = (ldst_info_t*)get_irn_link(load);
782         ir_node     *pred;
783         ir_node     *ptr       = get_Load_ptr(load);
784         ir_node     *mem       = get_Load_mem(load);
785         ir_mode     *load_mode = get_Load_mode(load);
786
787         for (pred = curr; load != pred; ) {
788                 ldst_info_t *pred_info = (ldst_info_t*)get_irn_link(pred);
789
790                 /*
791                  * a Load immediately after a Store -- a read after write.
792                  * We may remove the Load, if both Load & Store does not have an
793                  * exception handler OR they are in the same Block. In the latter
794                  * case the Load cannot throw an exception when the previous Store was
795                  * quiet.
796                  *
797                  * Why we need to check for Store Exception? If the Store cannot
798                  * be executed (ROM) the exception handler might simply jump into
799                  * the load Block :-(
800                  * We could make it a little bit better if we would know that the
801                  * exception handler of the Store jumps directly to the end...
802                  */
803                 if (is_Store(pred) && ((pred_info->projs[pn_Store_X_except] == NULL
804                                 && info->projs[pn_Load_X_except] == NULL)
805                                 || get_nodes_block(load) == get_nodes_block(pred)))
806                 {
807                         long    load_offset;
808                         ir_node *base_ptr = get_base_and_offset(ptr, &load_offset);
809                         int     changes   = try_load_after_store(load, base_ptr, load_offset, pred);
810
811                         if (changes != 0)
812                                 return res | changes;
813                 } else if (is_Load(pred) && get_Load_ptr(pred) == ptr &&
814                            can_use_stored_value(get_Load_mode(pred), load_mode)) {
815                         /*
816                          * a Load after a Load -- a read after read.
817                          * We may remove the second Load, if it does not have an exception
818                          * handler OR they are in the same Block. In the later case
819                          * the Load cannot throw an exception when the previous Load was
820                          * quiet.
821                          *
822                          * Here, there is no need to check if the previous Load has an
823                          * exception hander because they would have exact the same
824                          * exception...
825                          *
826                          * TODO: implement load-after-load with different mode for big
827                          *       endian
828                          */
829                         if (info->projs[pn_Load_X_except] == NULL
830                                         || get_nodes_block(load) == get_nodes_block(pred)) {
831                                 ir_node *value;
832
833                                 DBG_OPT_RAR(load, pred);
834
835                                 /* the result is used */
836                                 if (info->projs[pn_Load_res]) {
837                                         if (pred_info->projs[pn_Load_res] == NULL) {
838                                                 /* create a new Proj again */
839                                                 pred_info->projs[pn_Load_res] = new_r_Proj(pred, get_Load_mode(pred), pn_Load_res);
840                                         }
841                                         value = pred_info->projs[pn_Load_res];
842
843                                         /* add an convert if needed */
844                                         if (get_Load_mode(pred) != load_mode) {
845                                                 value = new_r_Conv(get_nodes_block(load), value, load_mode);
846                                         }
847
848                                         exchange(info->projs[pn_Load_res], value);
849                                 }
850
851                                 if (info->projs[pn_Load_M])
852                                         exchange(info->projs[pn_Load_M], mem);
853
854                                 /* no exception */
855                                 if (info->projs[pn_Load_X_except]) {
856                                         ir_graph *irg = get_irn_irg(load);
857                                         exchange(info->projs[pn_Load_X_except], new_r_Bad(irg, mode_X));
858                                         res |= CF_CHANGED;
859                                 }
860                                 if (info->projs[pn_Load_X_regular]) {
861                                         exchange( info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
862                                         res |= CF_CHANGED;
863                                 }
864
865                                 kill_node(load);
866                                 reduce_adr_usage(ptr);
867                                 return res |= DF_CHANGED;
868                         }
869                 }
870
871                 if (is_Store(pred)) {
872                         /* check if we can pass through this store */
873                         ir_alias_relation rel = get_alias_relation(
874                                 get_Store_ptr(pred),
875                                 get_irn_mode(get_Store_value(pred)),
876                                 ptr, load_mode);
877                         /* if the might be an alias, we cannot pass this Store */
878                         if (rel != ir_no_alias)
879                                 break;
880                         pred = skip_Proj(get_Store_mem(pred));
881                 } else if (is_Load(pred)) {
882                         pred = skip_Proj(get_Load_mem(pred));
883                 } else if (is_Call(pred)) {
884                         if (is_Call_pure(pred)) {
885                                 /* The called graph is at least pure, so there are no Store's
886                                    in it. We can handle it like a Load and skip it. */
887                                 pred = skip_Proj(get_Call_mem(pred));
888                         } else {
889                                 /* there might be Store's in the graph, stop here */
890                                 break;
891                         }
892                 } else {
893                         /* follow only Load chains */
894                         break;
895                 }
896
897                 /* check for cycles */
898                 if (NODE_VISITED(pred_info))
899                         break;
900                 MARK_NODE(pred_info);
901         }
902
903         if (is_Sync(pred)) {
904                 int i;
905
906                 /* handle all Sync predecessors */
907                 for (i = get_Sync_n_preds(pred) - 1; i >= 0; --i) {
908                         res |= follow_Mem_chain(load, skip_Proj(get_Sync_pred(pred, i)));
909                         if (res)
910                                 return res;
911                 }
912         }
913
914         return res;
915 }  /* follow_Mem_chain */
916
917 /*
918  * Check if we can replace the load by a given const from
919  * the const code irg.
920  */
921 ir_node *can_replace_load_by_const(const ir_node *load, ir_node *c)
922 {
923         ir_mode  *c_mode = get_irn_mode(c);
924         ir_mode  *l_mode = get_Load_mode(load);
925         ir_node  *block  = get_nodes_block(load);
926         dbg_info *dbgi   = get_irn_dbg_info(load);
927         ir_node  *res    = copy_const_value(dbgi, c, block);
928
929         if (c_mode != l_mode) {
930                 /* check, if the mode matches OR can be easily converted info */
931                 if (is_reinterpret_cast(c_mode, l_mode)) {
932                         /* copy the value from the const code irg and cast it */
933                         res = new_rd_Conv(dbgi, block, res, l_mode);
934                 } else {
935                         return NULL;
936                 }
937         }
938         return res;
939 }
940
941 /**
942  * optimize a Load
943  *
944  * @param load  the Load node
945  */
946 static unsigned optimize_load(ir_node *load)
947 {
948         ldst_info_t *info = (ldst_info_t*)get_irn_link(load);
949         ir_node     *mem, *ptr, *value;
950         ir_entity   *ent;
951         long        dummy;
952         unsigned    res = 0;
953
954         /* do NOT touch volatile loads for now */
955         if (get_Load_volatility(load) == volatility_is_volatile)
956                 return 0;
957
958         /* the address of the load to be optimized */
959         ptr = get_Load_ptr(load);
960
961         /* The mem of the Load. Must still be returned after optimization. */
962         mem = get_Load_mem(load);
963
964         if (info->projs[pn_Load_res] == NULL
965                         && info->projs[pn_Load_X_except] == NULL) {
966                 /* the value is never used and we don't care about exceptions, remove */
967                 exchange(info->projs[pn_Load_M], mem);
968
969                 if (info->projs[pn_Load_X_regular]) {
970                         /* should not happen, but if it does, remove it */
971                         exchange(info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
972                         res |= CF_CHANGED;
973                 }
974                 kill_node(load);
975                 reduce_adr_usage(ptr);
976                 return res | DF_CHANGED;
977         }
978
979         value = NULL;
980         /* check if we can determine the entity that will be loaded */
981         ent = find_constant_entity(ptr);
982         if (ent != NULL
983                         && get_entity_visibility(ent) != ir_visibility_external) {
984                 /* a static allocation that is not external: there should be NO
985                  * exception when loading even if we cannot replace the load itself.
986                  */
987
988                 /* no exception, clear the info field as it might be checked later again */
989                 if (info->projs[pn_Load_X_except]) {
990                         ir_graph *irg = get_irn_irg(load);
991                         exchange(info->projs[pn_Load_X_except], new_r_Bad(irg, mode_X));
992                         info->projs[pn_Load_X_except] = NULL;
993                         res |= CF_CHANGED;
994                 }
995                 if (info->projs[pn_Load_X_regular]) {
996                         exchange(info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
997                         info->projs[pn_Load_X_regular] = NULL;
998                         res |= CF_CHANGED;
999                 }
1000
1001                 if (get_entity_linkage(ent) & IR_LINKAGE_CONSTANT) {
1002                         if (has_entity_initializer(ent)) {
1003                                 /* new style initializer */
1004                                 value = find_compound_ent_value(ptr);
1005                         }
1006                         if (value != NULL) {
1007                                 ir_graph *irg = get_irn_irg(load);
1008                                 value = can_replace_load_by_const(load, value);
1009                                 if (value != NULL && is_Sel(ptr)) {
1010                                         /* frontend has inserted masking operations after bitfield accesses,
1011                                          * so we might have to shift the const. */
1012                                         unsigned char bit_offset = get_entity_offset_bits_remainder(get_Sel_entity(ptr));
1013                                         ir_tarval *tv_old = get_Const_tarval(value);
1014                                         ir_tarval *tv_offset = new_tarval_from_long(bit_offset, mode_Bu);
1015                                         ir_tarval *tv_new = tarval_shl(tv_old, tv_offset);
1016                                         value = new_r_Const(irg, tv_new);
1017                                 }
1018                         }
1019                 }
1020         }
1021         if (value != NULL) {
1022                 /* we completely replace the load by this value */
1023                 if (info->projs[pn_Load_X_except]) {
1024                         ir_graph *irg = get_irn_irg(load);
1025                         exchange(info->projs[pn_Load_X_except], new_r_Bad(irg, mode_X));
1026                         info->projs[pn_Load_X_except] = NULL;
1027                         res |= CF_CHANGED;
1028                 }
1029                 if (info->projs[pn_Load_X_regular]) {
1030                         exchange(info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
1031                         info->projs[pn_Load_X_regular] = NULL;
1032                         res |= CF_CHANGED;
1033                 }
1034                 if (info->projs[pn_Load_M]) {
1035                         exchange(info->projs[pn_Load_M], mem);
1036                         res |= DF_CHANGED;
1037                 }
1038                 if (info->projs[pn_Load_res]) {
1039                         exchange(info->projs[pn_Load_res], value);
1040                         res |= DF_CHANGED;
1041                 }
1042                 kill_node(load);
1043                 reduce_adr_usage(ptr);
1044                 return res;
1045         }
1046
1047         /* Check, if the address of this load is used more than once.
1048          * If not, more load cannot be removed in any case. */
1049         if (get_irn_n_uses(ptr) <= 1 && get_irn_n_uses(get_base_and_offset(ptr, &dummy)) <= 1)
1050                 return res;
1051
1052         /*
1053          * follow the memory chain as long as there are only Loads
1054          * and try to replace current Load or Store by a previous one.
1055          * Note that in unreachable loops it might happen that we reach
1056          * load again, as well as we can fall into a cycle.
1057          * We break such cycles using a special visited flag.
1058          */
1059         INC_MASTER();
1060         res = follow_Mem_chain(load, skip_Proj(mem));
1061         return res;
1062 }  /* optimize_load */
1063
1064 /**
1065  * Check whether a value of mode new_mode would completely overwrite a value
1066  * of mode old_mode in memory.
1067  */
1068 static int is_completely_overwritten(ir_mode *old_mode, ir_mode *new_mode)
1069 {
1070         return get_mode_size_bits(new_mode) >= get_mode_size_bits(old_mode);
1071 }  /* is_completely_overwritten */
1072
1073 /**
1074  * Check whether small is a part of large (starting at same address).
1075  */
1076 static int is_partially_same(ir_node *small, ir_node *large)
1077 {
1078         ir_mode *sm = get_irn_mode(small);
1079         ir_mode *lm = get_irn_mode(large);
1080
1081         /* FIXME: Check endianness */
1082         return is_Conv(small) && get_Conv_op(small) == large
1083             && get_mode_size_bytes(sm) < get_mode_size_bytes(lm)
1084             && get_mode_arithmetic(sm) == irma_twos_complement
1085             && get_mode_arithmetic(lm) == irma_twos_complement;
1086 }  /* is_partially_same */
1087
1088 /**
1089  * follow the memory chain as long as there are only Loads and alias free Stores.
1090  *
1091  * INC_MASTER() must be called before dive into
1092  */
1093 static unsigned follow_Mem_chain_for_Store(ir_node *store, ir_node *curr)
1094 {
1095         unsigned res = 0;
1096         ldst_info_t *info = (ldst_info_t*)get_irn_link(store);
1097         ir_node *pred;
1098         ir_node *ptr = get_Store_ptr(store);
1099         ir_node *mem = get_Store_mem(store);
1100         ir_node *value = get_Store_value(store);
1101         ir_mode *mode  = get_irn_mode(value);
1102         ir_node *block = get_nodes_block(store);
1103
1104         for (pred = curr; pred != store;) {
1105                 ldst_info_t *pred_info = (ldst_info_t*)get_irn_link(pred);
1106
1107                 /*
1108                  * BEWARE: one might think that checking the modes is useless, because
1109                  * if the pointers are identical, they refer to the same object.
1110                  * This is only true in strong typed languages, not is C were the following
1111                  * is possible *(ir_type1 *)p = a; *(ir_type2 *)p = b ...
1112                  * However, if the size of the mode that is written is bigger or equal the
1113                  * size of the old one, the old value is completely overwritten and can be
1114                  * killed ...
1115                  */
1116                 if (is_Store(pred) && get_Store_ptr(pred) == ptr &&
1117             get_nodes_block(pred) == block) {
1118                         /*
1119                          * a Store after a Store in the same Block -- a write after write.
1120                          */
1121
1122                         /*
1123                          * We may remove the first Store, if the old value is completely
1124                          * overwritten or the old value is a part of the new value,
1125                          * and if it does not have an exception handler.
1126                          *
1127                          * TODO: What, if both have the same exception handler ???
1128                          */
1129                         if (get_Store_volatility(pred) != volatility_is_volatile
1130                                 && !pred_info->projs[pn_Store_X_except]) {
1131                                 ir_node *predvalue = get_Store_value(pred);
1132                                 ir_mode *predmode  = get_irn_mode(predvalue);
1133
1134                                 if (is_completely_overwritten(predmode, mode)
1135                                         || is_partially_same(predvalue, value)) {
1136                                         DBG_OPT_WAW(pred, store);
1137                                         exchange(pred_info->projs[pn_Store_M], get_Store_mem(pred));
1138                                         kill_node(pred);
1139                                         reduce_adr_usage(ptr);
1140                                         return DF_CHANGED;
1141                                 }
1142                         }
1143
1144                         /*
1145                          * We may remove the Store, if the old value already contains
1146                          * the new value, and if it does not have an exception handler.
1147                          *
1148                          * TODO: What, if both have the same exception handler ???
1149                          */
1150                         if (get_Store_volatility(store) != volatility_is_volatile
1151                                 && !info->projs[pn_Store_X_except]) {
1152                                 ir_node *predvalue = get_Store_value(pred);
1153
1154                                 if (is_partially_same(value, predvalue)) {
1155                                         DBG_OPT_WAW(pred, store);
1156                                         exchange(info->projs[pn_Store_M], mem);
1157                                         kill_node(store);
1158                                         reduce_adr_usage(ptr);
1159                                         return DF_CHANGED;
1160                                 }
1161                         }
1162                 } else if (is_Load(pred) && get_Load_ptr(pred) == ptr &&
1163                            value == pred_info->projs[pn_Load_res]) {
1164                         /*
1165                          * a Store of a value just loaded from the same address
1166                          * -- a write after read.
1167                          * We may remove the Store, if it does not have an exception
1168                          * handler.
1169                          */
1170                         if (! info->projs[pn_Store_X_except]) {
1171                                 DBG_OPT_WAR(store, pred);
1172                                 exchange(info->projs[pn_Store_M], mem);
1173                                 kill_node(store);
1174                                 reduce_adr_usage(ptr);
1175                                 return DF_CHANGED;
1176                         }
1177                 }
1178
1179                 if (is_Store(pred)) {
1180                         /* check if we can pass through this store */
1181                         ir_alias_relation rel = get_alias_relation(
1182                                 get_Store_ptr(pred),
1183                                 get_irn_mode(get_Store_value(pred)),
1184                                 ptr, mode);
1185                         /* if the might be an alias, we cannot pass this Store */
1186                         if (rel != ir_no_alias)
1187                                 break;
1188                         pred = skip_Proj(get_Store_mem(pred));
1189                 } else if (is_Load(pred)) {
1190                         ir_alias_relation rel = get_alias_relation(
1191                                 get_Load_ptr(pred), get_Load_mode(pred),
1192                                 ptr, mode);
1193                         if (rel != ir_no_alias)
1194                                 break;
1195
1196                         pred = skip_Proj(get_Load_mem(pred));
1197                 } else {
1198                         /* follow only Load chains */
1199                         break;
1200                 }
1201
1202                 /* check for cycles */
1203                 if (NODE_VISITED(pred_info))
1204                         break;
1205                 MARK_NODE(pred_info);
1206         }
1207
1208         if (is_Sync(pred)) {
1209                 int i;
1210
1211                 /* handle all Sync predecessors */
1212                 for (i = get_Sync_n_preds(pred) - 1; i >= 0; --i) {
1213                         res |= follow_Mem_chain_for_Store(store, skip_Proj(get_Sync_pred(pred, i)));
1214                         if (res)
1215                                 break;
1216                 }
1217         }
1218         return res;
1219 }  /* follow_Mem_chain_for_Store */
1220
1221 /** find entity used as base for an address calculation */
1222 static ir_entity *find_entity(ir_node *ptr)
1223 {
1224         switch (get_irn_opcode(ptr)) {
1225         case iro_SymConst:
1226                 return get_SymConst_entity(ptr);
1227         case iro_Sel: {
1228                 ir_node *pred = get_Sel_ptr(ptr);
1229                 if (get_irg_frame(get_irn_irg(ptr)) == pred)
1230                         return get_Sel_entity(ptr);
1231
1232                 return find_entity(pred);
1233         }
1234         case iro_Sub:
1235         case iro_Add: {
1236                 ir_node *left = get_binop_left(ptr);
1237                 ir_node *right;
1238                 if (mode_is_reference(get_irn_mode(left)))
1239                         return find_entity(left);
1240                 right = get_binop_right(ptr);
1241                 if (mode_is_reference(get_irn_mode(right)))
1242                         return find_entity(right);
1243                 return NULL;
1244         }
1245         default:
1246                 return NULL;
1247         }
1248 }
1249
1250 /**
1251  * optimize a Store
1252  *
1253  * @param store  the Store node
1254  */
1255 static unsigned optimize_store(ir_node *store)
1256 {
1257         ir_node   *ptr;
1258         ir_node   *mem;
1259         ir_entity *entity;
1260
1261         if (get_Store_volatility(store) == volatility_is_volatile)
1262                 return 0;
1263
1264         ptr    = get_Store_ptr(store);
1265         entity = find_entity(ptr);
1266
1267         /* a store to an entity which is never read is unnecessary */
1268         if (entity != NULL && !(get_entity_usage(entity) & ir_usage_read)) {
1269                 ldst_info_t *info = (ldst_info_t*)get_irn_link(store);
1270                 if (info->projs[pn_Store_X_except] == NULL) {
1271                         DB((dbg, LEVEL_1, "  Killing useless %+F to never read entity %+F\n", store, entity));
1272                         exchange(info->projs[pn_Store_M], get_Store_mem(store));
1273                         kill_node(store);
1274                         reduce_adr_usage(ptr);
1275                         return DF_CHANGED;
1276                 }
1277         }
1278
1279         /* Check, if the address of this Store is used more than once.
1280          * If not, this Store cannot be removed in any case. */
1281         if (get_irn_n_uses(ptr) <= 1)
1282                 return 0;
1283
1284         mem = get_Store_mem(store);
1285
1286         /* follow the memory chain as long as there are only Loads */
1287         INC_MASTER();
1288
1289         return follow_Mem_chain_for_Store(store, skip_Proj(mem));
1290 }  /* optimize_store */
1291
1292 /**
1293  * walker, optimizes Phi after Stores to identical places:
1294  * Does the following optimization:
1295  * @verbatim
1296  *
1297  *   val1   val2   val3          val1  val2  val3
1298  *    |      |      |               \    |    /
1299  *  Store  Store  Store              \   |   /
1300  *      \    |    /                   PhiData
1301  *       \   |   /                       |
1302  *        \  |  /                      Store
1303  *          PhiM
1304  *
1305  * @endverbatim
1306  * This reduces the number of stores and allows for predicated execution.
1307  * Moves Stores back to the end of a function which may be bad.
1308  *
1309  * This is only possible if the predecessor blocks have only one successor.
1310  */
1311 static unsigned optimize_phi(ir_node *phi, walk_env_t *wenv)
1312 {
1313         int i, n;
1314         ir_node *store, *ptr, *block, *phi_block, *phiM, *phiD, *exc, *projM;
1315 #ifdef DO_CACHEOPT
1316         ir_node *old_store;
1317 #endif
1318         ir_mode *mode;
1319         ir_node **inM, **inD, **projMs;
1320         int *idx;
1321         dbg_info *db = NULL;
1322         ldst_info_t *info;
1323         block_info_t *bl_info;
1324         unsigned res = 0;
1325
1326         /* Must be a memory Phi */
1327         if (get_irn_mode(phi) != mode_M)
1328                 return 0;
1329
1330         n = get_Phi_n_preds(phi);
1331         if (n <= 0)
1332                 return 0;
1333
1334         /* must be only one user */
1335         projM = get_Phi_pred(phi, 0);
1336         if (get_irn_n_edges(projM) != 1)
1337                 return 0;
1338
1339         store = skip_Proj(projM);
1340 #ifdef DO_CACHEOPT
1341         old_store = store;
1342 #endif
1343         if (!is_Store(store))
1344                 return 0;
1345
1346         block = get_nodes_block(store);
1347
1348         /* check if the block is post dominated by Phi-block
1349            and has no exception exit */
1350         bl_info = (block_info_t*)get_irn_link(block);
1351         if (bl_info->flags & BLOCK_HAS_EXC)
1352                 return 0;
1353
1354         phi_block = get_nodes_block(phi);
1355         if (! block_strictly_postdominates(phi_block, block))
1356                 return 0;
1357
1358         /* this is the address of the store */
1359         ptr  = get_Store_ptr(store);
1360         mode = get_irn_mode(get_Store_value(store));
1361         info = (ldst_info_t*)get_irn_link(store);
1362         exc  = info->exc_block;
1363
1364         for (i = 1; i < n; ++i) {
1365                 ir_node *pred = get_Phi_pred(phi, i);
1366
1367                 if (get_irn_n_edges(pred) != 1)
1368                         return 0;
1369
1370                 pred = skip_Proj(pred);
1371                 if (!is_Store(pred))
1372                         return 0;
1373
1374                 if (ptr != get_Store_ptr(pred) || mode != get_irn_mode(get_Store_value(pred)))
1375                         return 0;
1376
1377                 info = (ldst_info_t*)get_irn_link(pred);
1378
1379                 /* check, if all stores have the same exception flow */
1380                 if (exc != info->exc_block)
1381                         return 0;
1382
1383                 block = get_nodes_block(pred);
1384
1385                 /* check if the block is post dominated by Phi-block
1386                    and has no exception exit. Note that block must be different from
1387                    Phi-block, else we would move a Store from end End of a block to its
1388                    Start... */
1389                 bl_info = (block_info_t*)get_irn_link(block);
1390                 if (bl_info->flags & BLOCK_HAS_EXC)
1391                         return 0;
1392                 if (block == phi_block || ! block_postdominates(phi_block, block))
1393                         return 0;
1394         }
1395
1396         /*
1397          * ok, when we are here, we found all predecessors of a Phi that
1398          * are Stores to the same address and size. That means whatever
1399          * we do before we enter the block of the Phi, we do a Store.
1400          * So, we can move the Store to the current block:
1401          *
1402          *   val1    val2    val3          val1  val2  val3
1403          *    |       |       |               \    |    /
1404          * | Str | | Str | | Str |             \   |   /
1405          *      \     |     /                   PhiData
1406          *       \    |    /                       |
1407          *        \   |   /                       Str
1408          *           PhiM
1409          *
1410          * Is only allowed if the predecessor blocks have only one successor.
1411          */
1412
1413         NEW_ARR_A(ir_node *, projMs, n);
1414         NEW_ARR_A(ir_node *, inM, n);
1415         NEW_ARR_A(ir_node *, inD, n);
1416         NEW_ARR_A(int, idx, n);
1417
1418         /* Prepare: Collect all Store nodes.  We must do this
1419            first because we otherwise may loose a store when exchanging its
1420            memory Proj.
1421          */
1422         for (i = n - 1; i >= 0; --i) {
1423                 ir_node *store;
1424
1425                 projMs[i] = get_Phi_pred(phi, i);
1426                 assert(is_Proj(projMs[i]));
1427
1428                 store = get_Proj_pred(projMs[i]);
1429                 info  = (ldst_info_t*)get_irn_link(store);
1430
1431                 inM[i] = get_Store_mem(store);
1432                 inD[i] = get_Store_value(store);
1433                 idx[i] = info->exc_idx;
1434         }
1435         block = get_nodes_block(phi);
1436
1437         /* second step: create a new memory Phi */
1438         phiM = new_rd_Phi(get_irn_dbg_info(phi), block, n, inM, mode_M);
1439
1440         /* third step: create a new data Phi */
1441         phiD = new_rd_Phi(get_irn_dbg_info(phi), block, n, inD, mode);
1442
1443         /* rewire memory and kill the node */
1444         for (i = n - 1; i >= 0; --i) {
1445                 ir_node *proj  = projMs[i];
1446
1447                 if (is_Proj(proj)) {
1448                         ir_node *store = get_Proj_pred(proj);
1449                         exchange(proj, inM[i]);
1450                         kill_node(store);
1451                 }
1452         }
1453
1454         /* fourth step: create the Store */
1455         store = new_rd_Store(db, block, phiM, ptr, phiD, cons_none);
1456 #ifdef DO_CACHEOPT
1457         co_set_irn_name(store, co_get_irn_ident(old_store));
1458 #endif
1459
1460         projM = new_rd_Proj(NULL, store, mode_M, pn_Store_M);
1461
1462         info = get_ldst_info(store, &wenv->obst);
1463         info->projs[pn_Store_M] = projM;
1464
1465         /* fifths step: repair exception flow */
1466         if (exc) {
1467                 ir_node *projX = new_rd_Proj(NULL, store, mode_X, pn_Store_X_except);
1468
1469                 info->projs[pn_Store_X_except] = projX;
1470                 info->exc_block                = exc;
1471                 info->exc_idx                  = idx[0];
1472
1473                 for (i = 0; i < n; ++i) {
1474                         set_Block_cfgpred(exc, idx[i], projX);
1475                 }
1476
1477                 if (n > 1) {
1478                         /* the exception block should be optimized as some inputs are identical now */
1479                 }
1480
1481                 res |= CF_CHANGED;
1482         }
1483
1484         /* sixth step: replace old Phi */
1485         exchange(phi, projM);
1486
1487         return res | DF_CHANGED;
1488 }  /* optimize_phi */
1489
1490 /**
1491  * walker, do the optimizations
1492  */
1493 static void do_load_store_optimize(ir_node *n, void *env)
1494 {
1495         walk_env_t *wenv = (walk_env_t*)env;
1496
1497         switch (get_irn_opcode(n)) {
1498
1499         case iro_Load:
1500                 wenv->changes |= optimize_load(n);
1501                 break;
1502
1503         case iro_Store:
1504                 wenv->changes |= optimize_store(n);
1505                 break;
1506
1507         case iro_Phi:
1508                 wenv->changes |= optimize_phi(n, wenv);
1509                 break;
1510
1511         default:
1512                 break;
1513         }
1514 }  /* do_load_store_optimize */
1515
1516 /** A scc. */
1517 typedef struct scc {
1518         ir_node *head;      /**< the head of the list */
1519 } scc;
1520
1521 /** A node entry. */
1522 typedef struct node_entry {
1523         unsigned DFSnum;    /**< the DFS number of this node */
1524         unsigned low;       /**< the low number of this node */
1525         int      in_stack;  /**< flag, set if the node is on the stack */
1526         ir_node  *next;     /**< link to the next node the the same scc */
1527         scc      *pscc;     /**< the scc of this node */
1528         unsigned POnum;     /**< the post order number for blocks */
1529 } node_entry;
1530
1531 /** A loop entry. */
1532 typedef struct loop_env {
1533         ir_nodehashmap_t map;
1534         struct obstack   obst;
1535         ir_node          **stack;      /**< the node stack */
1536         size_t           tos;          /**< tos index */
1537         unsigned         nextDFSnum;   /**< the current DFS number */
1538         unsigned         POnum;        /**< current post order number */
1539
1540         unsigned         changes;      /**< a bitmask of graph changes */
1541 } loop_env;
1542
1543 /**
1544 * Gets the node_entry of a node
1545 */
1546 static node_entry *get_irn_ne(ir_node *irn, loop_env *env)
1547 {
1548         node_entry *e = (node_entry*)ir_nodehashmap_get(&env->map, irn);
1549
1550         if (e == NULL) {
1551                 e = OALLOC(&env->obst, node_entry);
1552                 memset(e, 0, sizeof(*e));
1553                 ir_nodehashmap_insert(&env->map, irn, e);
1554         }
1555         return e;
1556 }  /* get_irn_ne */
1557
1558 /**
1559  * Push a node onto the stack.
1560  *
1561  * @param env   the loop environment
1562  * @param n     the node to push
1563  */
1564 static void push(loop_env *env, ir_node *n)
1565 {
1566         node_entry *e;
1567
1568         if (env->tos == ARR_LEN(env->stack)) {
1569                 size_t nlen = ARR_LEN(env->stack) * 2;
1570                 ARR_RESIZE(ir_node *, env->stack, nlen);
1571         }
1572         env->stack[env->tos++] = n;
1573         e = get_irn_ne(n, env);
1574         e->in_stack = 1;
1575 }  /* push */
1576
1577 /**
1578  * pop a node from the stack
1579  *
1580  * @param env   the loop environment
1581  *
1582  * @return  The topmost node
1583  */
1584 static ir_node *pop(loop_env *env)
1585 {
1586         ir_node *n = env->stack[--env->tos];
1587         node_entry *e = get_irn_ne(n, env);
1588
1589         e->in_stack = 0;
1590         return n;
1591 }  /* pop */
1592
1593 /**
1594  * Check if irn is a region constant.
1595  * The block or irn must strictly dominate the header block.
1596  *
1597  * @param irn           the node to check
1598  * @param header_block  the header block of the induction variable
1599  */
1600 static int is_rc(ir_node *irn, ir_node *header_block)
1601 {
1602         ir_node *block = get_nodes_block(irn);
1603
1604         return (block != header_block) && block_dominates(block, header_block);
1605 }  /* is_rc */
1606
1607 typedef struct phi_entry phi_entry;
1608 struct phi_entry {
1609         ir_node   *phi;    /**< A phi with a region const memory. */
1610         int       pos;     /**< The position of the region const memory */
1611         ir_node   *load;   /**< the newly created load for this phi */
1612         phi_entry *next;
1613 };
1614
1615 /**
1616  * An entry in the avail set.
1617  */
1618 typedef struct avail_entry_t {
1619         ir_node *ptr;   /**< the address pointer */
1620         ir_mode *mode;  /**< the load mode */
1621         ir_node *load;  /**< the associated Load */
1622 } avail_entry_t;
1623
1624 /**
1625  * Compare two avail entries.
1626  */
1627 static int cmp_avail_entry(const void *elt, const void *key, size_t size)
1628 {
1629         const avail_entry_t *a = (const avail_entry_t*)elt;
1630         const avail_entry_t *b = (const avail_entry_t*)key;
1631         (void) size;
1632
1633         return a->ptr != b->ptr || a->mode != b->mode;
1634 }  /* cmp_avail_entry */
1635
1636 /**
1637  * Calculate the hash value of an avail entry.
1638  */
1639 static unsigned hash_cache_entry(const avail_entry_t *entry)
1640 {
1641         return get_irn_idx(entry->ptr) * 9 + hash_ptr(entry->mode);
1642 }  /* hash_cache_entry */
1643
1644 /**
1645  * Move loops out of loops if possible.
1646  *
1647  * @param pscc   the loop described by an SCC
1648  * @param env    the loop environment
1649  */
1650 static void move_loads_out_of_loops(scc *pscc, loop_env *env)
1651 {
1652         ir_node   *phi, *load, *next, *other, *next_other;
1653         int       j;
1654         phi_entry *phi_list = NULL;
1655         set       *avail;
1656
1657         /* collect all outer memories */
1658         for (phi = pscc->head; phi != NULL; phi = next) {
1659                 node_entry *ne = get_irn_ne(phi, env);
1660                 next = ne->next;
1661
1662                 /* check all memory Phi's */
1663                 if (! is_Phi(phi))
1664                         continue;
1665
1666                 assert(get_irn_mode(phi) == mode_M && "DFS return non-memory Phi");
1667
1668                 for (j = get_irn_arity(phi) - 1; j >= 0; --j) {
1669                         ir_node    *pred = get_irn_n(phi, j);
1670                         node_entry *pe   = get_irn_ne(pred, env);
1671
1672                         if (pe->pscc != ne->pscc) {
1673                                 /* not in the same SCC, is region const */
1674                                 phi_entry *pe = OALLOC(&env->obst, phi_entry);
1675
1676                                 pe->phi  = phi;
1677                                 pe->pos  = j;
1678                                 pe->next = phi_list;
1679                                 phi_list = pe;
1680                         }
1681                 }
1682         }
1683         /* no Phis no fun */
1684         assert(phi_list != NULL && "DFS found a loop without Phi");
1685
1686         /* for now, we cannot handle more than one input (only reducible cf) */
1687         if (phi_list->next != NULL)
1688                 return;
1689
1690         avail = new_set(cmp_avail_entry, 8);
1691
1692         for (load = pscc->head; load; load = next) {
1693                 ir_mode *load_mode;
1694                 node_entry *ne = get_irn_ne(load, env);
1695                 next = ne->next;
1696
1697                 if (is_Load(load)) {
1698                         ldst_info_t *info = (ldst_info_t*)get_irn_link(load);
1699                         ir_node     *ptr = get_Load_ptr(load);
1700
1701                         /* for now, we cannot handle Loads with exceptions */
1702                         if (info->projs[pn_Load_res] == NULL || info->projs[pn_Load_X_regular] != NULL || info->projs[pn_Load_X_except] != NULL)
1703                                 continue;
1704
1705                         /* for now, we can only move Load(Global) */
1706                         if (! is_SymConst_addr_ent(ptr))
1707                                 continue;
1708                         load_mode = get_Load_mode(load);
1709                         for (other = pscc->head; other != NULL; other = next_other) {
1710                                 node_entry *ne = get_irn_ne(other, env);
1711                                 next_other = ne->next;
1712
1713                                 if (is_Store(other)) {
1714                                         ir_alias_relation rel = get_alias_relation(
1715                                                 get_Store_ptr(other),
1716                                                 get_irn_mode(get_Store_value(other)),
1717                                                 ptr, load_mode);
1718                                         /* if the might be an alias, we cannot pass this Store */
1719                                         if (rel != ir_no_alias)
1720                                                 break;
1721                                 }
1722                                 /* only Phis and pure Calls are allowed here, so ignore them */
1723                         }
1724                         if (other == NULL) {
1725                                 ldst_info_t *ninfo = NULL;
1726                                 phi_entry   *pe;
1727                                 dbg_info    *db;
1728
1729                                 /* yep, no aliasing Store found, Load can be moved */
1730                                 DB((dbg, LEVEL_1, "  Found a Load that could be moved: %+F\n", load));
1731
1732                                 db   = get_irn_dbg_info(load);
1733                                 for (pe = phi_list; pe != NULL; pe = pe->next) {
1734                                         int     pos   = pe->pos;
1735                                         ir_node *phi  = pe->phi;
1736                                         ir_node *blk  = get_nodes_block(phi);
1737                                         ir_node *pred = get_Block_cfgpred_block(blk, pos);
1738                                         ir_node *irn, *mem;
1739                                         avail_entry_t entry, *res;
1740
1741                                         entry.ptr  = ptr;
1742                                         entry.mode = load_mode;
1743                                         res = set_find(avail_entry_t, avail, &entry, sizeof(entry), hash_cache_entry(&entry));
1744                                         if (res != NULL) {
1745                                                 irn = res->load;
1746                                         } else {
1747                                                 irn = new_rd_Load(db, pred, get_Phi_pred(phi, pos), ptr, load_mode, cons_none);
1748                                                 entry.load = irn;
1749                                                 set_insert(avail_entry_t, avail, &entry, sizeof(entry), hash_cache_entry(&entry));
1750                                                 DB((dbg, LEVEL_1, "  Created %+F in %+F\n", irn, pred));
1751                                         }
1752                                         pe->load = irn;
1753                                         ninfo = get_ldst_info(irn, &env->obst);
1754
1755                                         ninfo->projs[pn_Load_M] = mem = new_r_Proj(irn, mode_M, pn_Load_M);
1756                                         if (res == NULL) {
1757                                                 /* irn is from cache, so do not set phi pred again.
1758                                                  * There might be other Loads between phi and irn already.
1759                                                  */
1760                                                 set_Phi_pred(phi, pos, mem);
1761                                         }
1762
1763                                         ninfo->projs[pn_Load_res] = new_r_Proj(irn, load_mode, pn_Load_res);
1764                                 }
1765
1766                                 /* now kill the old Load */
1767                                 exchange(info->projs[pn_Load_M], get_Load_mem(load));
1768                                 exchange(info->projs[pn_Load_res], ninfo->projs[pn_Load_res]);
1769
1770                                 env->changes |= DF_CHANGED;
1771                         }
1772                 }
1773         }
1774         del_set(avail);
1775 }  /* move_loads_out_of_loops */
1776
1777 /**
1778  * Process a loop SCC.
1779  *
1780  * @param pscc  the SCC
1781  * @param env   the loop environment
1782  */
1783 static void process_loop(scc *pscc, loop_env *env)
1784 {
1785         ir_node *irn, *next, *header = NULL;
1786         node_entry *b, *h = NULL;
1787         int j, only_phi, num_outside, process = 0;
1788         ir_node *out_rc;
1789
1790         /* find the header block for this scc */
1791         for (irn = pscc->head; irn; irn = next) {
1792                 node_entry *e = get_irn_ne(irn, env);
1793                 ir_node *block = get_nodes_block(irn);
1794
1795                 next = e->next;
1796                 b = get_irn_ne(block, env);
1797
1798                 if (header != NULL) {
1799                         if (h->POnum < b->POnum) {
1800                                 header = block;
1801                                 h      = b;
1802                         }
1803                 } else {
1804                         header = block;
1805                         h      = b;
1806                 }
1807         }
1808
1809         /* check if this scc contains only Phi, Loads or Stores nodes */
1810         only_phi    = 1;
1811         num_outside = 0;
1812         out_rc      = NULL;
1813         for (irn = pscc->head; irn; irn = next) {
1814                 node_entry *e = get_irn_ne(irn, env);
1815
1816                 next = e->next;
1817                 switch (get_irn_opcode(irn)) {
1818                 case iro_Call:
1819                         if (is_Call_pure(irn)) {
1820                                 /* pure calls can be treated like loads */
1821                                 only_phi = 0;
1822                                 break;
1823                         }
1824                         /* non-pure calls must be handle like may-alias Stores */
1825                         goto fail;
1826                 case iro_CopyB:
1827                         /* cannot handle CopyB yet */
1828                         goto fail;
1829                 case iro_Load:
1830                         process = 1;
1831                         if (get_Load_volatility(irn) == volatility_is_volatile) {
1832                                 /* cannot handle loops with volatile Loads */
1833                                 goto fail;
1834                         }
1835                         only_phi = 0;
1836                         break;
1837                 case iro_Store:
1838                         if (get_Store_volatility(irn) == volatility_is_volatile) {
1839                                 /* cannot handle loops with volatile Stores */
1840                                 goto fail;
1841                         }
1842                         only_phi = 0;
1843                         break;
1844                 default:
1845                         only_phi = 0;
1846                         break;
1847                 case iro_Phi:
1848                         for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
1849                                 ir_node *pred  = get_irn_n(irn, j);
1850                                 node_entry *pe = get_irn_ne(pred, env);
1851
1852                                 if (pe->pscc != e->pscc) {
1853                                         /* not in the same SCC, must be a region const */
1854                                         if (! is_rc(pred, header)) {
1855                                                 /* not a memory loop */
1856                                                 goto fail;
1857                                         }
1858                                         if (out_rc == NULL) {
1859                                                 /* first region constant */
1860                                                 out_rc = pred;
1861                                                 ++num_outside;
1862                                         } else if (out_rc != pred) {
1863                                                 /* another region constant */
1864                                                 ++num_outside;
1865                                         }
1866                                 }
1867                         }
1868                         break;
1869                 }
1870         }
1871         if (! process)
1872                 goto fail;
1873
1874         /* found a memory loop */
1875         DB((dbg, LEVEL_2, "  Found a memory loop:\n  "));
1876         if (only_phi && num_outside == 1) {
1877                 /* a phi cycle with only one real predecessor can be collapsed */
1878                 DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
1879
1880                 for (irn = pscc->head; irn; irn = next) {
1881                         node_entry *e = get_irn_ne(irn, env);
1882                         next = e->next;
1883                         exchange(irn, out_rc);
1884                 }
1885                 env->changes |= DF_CHANGED;
1886                 return;
1887         }
1888
1889 #ifdef DEBUG_libfirm
1890         for (irn = pscc->head; irn; irn = next) {
1891                 node_entry *e = get_irn_ne(irn, env);
1892                 next = e->next;
1893                 DB((dbg, LEVEL_2, " %+F,", irn));
1894         }
1895         DB((dbg, LEVEL_2, "\n"));
1896 #endif
1897         move_loads_out_of_loops(pscc, env);
1898
1899 fail:
1900         ;
1901 }  /* process_loop */
1902
1903 /**
1904  * Process a SCC.
1905  *
1906  * @param pscc  the SCC
1907  * @param env   the loop environment
1908  */
1909 static void process_scc(scc *pscc, loop_env *env)
1910 {
1911         ir_node *head = pscc->head;
1912         node_entry *e = get_irn_ne(head, env);
1913
1914 #ifdef DEBUG_libfirm
1915         {
1916                 ir_node *irn, *next;
1917
1918                 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
1919                 for (irn = pscc->head; irn; irn = next) {
1920                         node_entry *e = get_irn_ne(irn, env);
1921
1922                         next = e->next;
1923
1924                         DB((dbg, LEVEL_4, " %+F,", irn));
1925                 }
1926                 DB((dbg, LEVEL_4, "\n"));
1927         }
1928 #endif
1929
1930         if (e->next != NULL) {
1931                 /* this SCC has more than one member */
1932                 process_loop(pscc, env);
1933         }
1934 }  /* process_scc */
1935
1936 /**
1937  * Do Tarjan's SCC algorithm and drive load/store optimization.
1938  *
1939  * @param irn  start at this node
1940  * @param env  the loop environment
1941  */
1942 static void dfs(ir_node *irn, loop_env *env)
1943 {
1944         int i, n;
1945         node_entry *node = get_irn_ne(irn, env);
1946
1947         mark_irn_visited(irn);
1948
1949         node->DFSnum = env->nextDFSnum++;
1950         node->low    = node->DFSnum;
1951         push(env, irn);
1952
1953         /* handle preds */
1954         if (is_Phi(irn) || is_Sync(irn)) {
1955                 n = get_irn_arity(irn);
1956                 for (i = 0; i < n; ++i) {
1957                         ir_node *pred = get_irn_n(irn, i);
1958                         node_entry *o = get_irn_ne(pred, env);
1959
1960                         if (!irn_visited(pred)) {
1961                                 dfs(pred, env);
1962                                 node->low = MIN(node->low, o->low);
1963                         }
1964                         if (o->DFSnum < node->DFSnum && o->in_stack)
1965                                 node->low = MIN(o->DFSnum, node->low);
1966                 }
1967         } else if (is_fragile_op(irn)) {
1968                 ir_node *pred = get_memop_mem(irn);
1969                 node_entry *o = get_irn_ne(pred, env);
1970
1971                 if (!irn_visited(pred)) {
1972                         dfs(pred, env);
1973                         node->low = MIN(node->low, o->low);
1974                 }
1975                 if (o->DFSnum < node->DFSnum && o->in_stack)
1976                         node->low = MIN(o->DFSnum, node->low);
1977         } else if (is_Proj(irn)) {
1978                 ir_node *pred = get_Proj_pred(irn);
1979                 node_entry *o = get_irn_ne(pred, env);
1980
1981                 if (!irn_visited(pred)) {
1982                         dfs(pred, env);
1983                         node->low = MIN(node->low, o->low);
1984                 }
1985                 if (o->DFSnum < node->DFSnum && o->in_stack)
1986                         node->low = MIN(o->DFSnum, node->low);
1987         }
1988         else {
1989                  /* IGNORE predecessors */
1990         }
1991
1992         if (node->low == node->DFSnum) {
1993                 scc *pscc = OALLOC(&env->obst, scc);
1994                 ir_node *x;
1995
1996                 pscc->head = NULL;
1997                 do {
1998                         node_entry *e;
1999
2000                         x = pop(env);
2001                         e = get_irn_ne(x, env);
2002                         e->pscc    = pscc;
2003                         e->next    = pscc->head;
2004                         pscc->head = x;
2005                 } while (x != irn);
2006
2007                 process_scc(pscc, env);
2008         }
2009 }  /* dfs */
2010
2011 /**
2012  * Do the DFS on the memory edges a graph.
2013  *
2014  * @param irg  the graph to process
2015  * @param env  the loop environment
2016  */
2017 static void do_dfs(ir_graph *irg, loop_env *env)
2018 {
2019         ir_node  *endblk, *end;
2020         int      i;
2021
2022         inc_irg_visited(irg);
2023
2024         /* visit all memory nodes */
2025         endblk = get_irg_end_block(irg);
2026         for (i = get_Block_n_cfgpreds(endblk) - 1; i >= 0; --i) {
2027                 ir_node *pred = get_Block_cfgpred(endblk, i);
2028
2029                 pred = skip_Proj(pred);
2030                 if (is_Return(pred)) {
2031                         dfs(get_Return_mem(pred), env);
2032                 } else if (is_Raise(pred)) {
2033                         dfs(get_Raise_mem(pred), env);
2034                 } else if (is_fragile_op(pred)) {
2035                         dfs(get_memop_mem(pred), env);
2036                 } else if (is_Bad(pred)) {
2037                         /* ignore non-optimized block predecessor */
2038                 } else {
2039                         assert(0 && "Unknown EndBlock predecessor");
2040                 }
2041         }
2042
2043         /* visit the keep-alives */
2044         end = get_irg_end(irg);
2045         for (i = get_End_n_keepalives(end) - 1; i >= 0; --i) {
2046                 ir_node *ka = get_End_keepalive(end, i);
2047
2048                 if (is_Phi(ka) && !irn_visited(ka))
2049                         dfs(ka, env);
2050         }
2051 }  /* do_dfs */
2052
2053 /**
2054  * Optimize Loads/Stores in loops.
2055  *
2056  * @param irg  the graph
2057  */
2058 static int optimize_loops(ir_graph *irg)
2059 {
2060         loop_env env;
2061
2062         env.stack         = NEW_ARR_F(ir_node *, 128);
2063         env.tos           = 0;
2064         env.nextDFSnum    = 0;
2065         env.POnum         = 0;
2066         env.changes       = 0;
2067         ir_nodehashmap_init(&env.map);
2068         obstack_init(&env.obst);
2069
2070         /* calculate the SCC's and drive loop optimization. */
2071         do_dfs(irg, &env);
2072
2073         DEL_ARR_F(env.stack);
2074         obstack_free(&env.obst, NULL);
2075         ir_nodehashmap_destroy(&env.map);
2076
2077         return env.changes;
2078 }  /* optimize_loops */
2079
2080 /*
2081  * do the load store optimization
2082  */
2083 void optimize_load_store(ir_graph *irg)
2084 {
2085         walk_env_t env;
2086
2087         assure_irg_properties(irg,
2088                 IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE
2089                 | IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES
2090                 | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
2091                 | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
2092                 | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
2093
2094         FIRM_DBG_REGISTER(dbg, "firm.opt.ldstopt");
2095
2096         assert(get_irg_phase_state(irg) != phase_building);
2097         assert(get_irg_pinned(irg) != op_pin_state_floats &&
2098                 "LoadStore optimization needs pinned graph");
2099
2100         if (get_opt_alias_analysis()) {
2101                 assure_irp_globals_entity_usage_computed();
2102         }
2103
2104         obstack_init(&env.obst);
2105         env.changes = 0;
2106
2107         /* init the links, then collect Loads/Stores/Proj's in lists */
2108         master_visited = 0;
2109         irg_walk_graph(irg, firm_clear_link, collect_nodes, &env);
2110
2111         /* now we have collected enough information, optimize */
2112         irg_walk_graph(irg, NULL, do_load_store_optimize, &env);
2113
2114         env.changes |= optimize_loops(irg);
2115
2116         obstack_free(&env.obst, NULL);
2117
2118         confirm_irg_properties(irg,
2119                 env.changes
2120                 ? env.changes & CF_CHANGED
2121                         ? IR_GRAPH_PROPERTIES_NONE
2122                         : IR_GRAPH_PROPERTIES_CONTROL_FLOW
2123                 : IR_GRAPH_PROPERTIES_ALL);
2124 }
2125
2126 ir_graph_pass_t *optimize_load_store_pass(const char *name)
2127 {
2128         return def_graph_pass(name ? name : "ldst", optimize_load_store);
2129 }  /* optimize_load_store_pass */