07561222c961b2aa66a60d3d56b67fdbfc94ec9f
[libfirm] / ir / opt / ldstopt.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Load/Store optimizations.
23  * @author  Michael Beck
24  */
25 #include "config.h"
26
27 #include <string.h>
28
29 #include "iroptimize.h"
30 #include "irnode_t.h"
31 #include "irgraph_t.h"
32 #include "irmode_t.h"
33 #include "iropt_t.h"
34 #include "ircons_t.h"
35 #include "irgmod.h"
36 #include "irgwalk.h"
37 #include "irtools.h"
38 #include "tv_t.h"
39 #include "dbginfo_t.h"
40 #include "iropt_dbg.h"
41 #include "irflag_t.h"
42 #include "array_t.h"
43 #include "irhooks.h"
44 #include "iredges.h"
45 #include "irpass.h"
46 #include "irmemory.h"
47 #include "irnodehashmap.h"
48 #include "irgopt.h"
49 #include "set.h"
50 #include "be.h"
51 #include "debug.h"
52 #include "opt_manage.h"
53
54 /** The debug handle. */
55 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
56
57 #undef IMAX
58 #define IMAX(a,b)   ((a) > (b) ? (a) : (b))
59
60 #define MAX_PROJ    IMAX(IMAX((long)pn_Load_max, (long)pn_Store_max), (long)pn_Call_max)
61
62 enum changes_t {
63         DF_CHANGED = 1,       /**< data flow changed */
64         CF_CHANGED = 2,       /**< control flow changed */
65 };
66
67 /**
68  * walker environment
69  */
70 typedef struct walk_env_t {
71         struct obstack obst;          /**< list of all stores */
72         unsigned changes;             /**< a bitmask of graph changes */
73 } walk_env_t;
74
75 /** A Load/Store info. */
76 typedef struct ldst_info_t {
77         ir_node  *projs[MAX_PROJ+1];  /**< list of Proj's of this node */
78         ir_node  *exc_block;          /**< the exception block if available */
79         int      exc_idx;             /**< predecessor index in the exception block */
80         unsigned visited;             /**< visited counter for breaking loops */
81 } ldst_info_t;
82
83 /**
84  * flags for control flow.
85  */
86 enum block_flags_t {
87         BLOCK_HAS_COND = 1,      /**< Block has conditional control flow */
88         BLOCK_HAS_EXC  = 2       /**< Block has exceptional control flow */
89 };
90
91 /**
92  * a Block info.
93  */
94 typedef struct block_info_t {
95         unsigned flags;               /**< flags for the block */
96 } block_info_t;
97
98 /** the master visited flag for loop detection. */
99 static unsigned master_visited = 0;
100
101 #define INC_MASTER()       ++master_visited
102 #define MARK_NODE(info)    (info)->visited = master_visited
103 #define NODE_VISITED(info) (info)->visited >= master_visited
104
105 /**
106  * get the Load/Store info of a node
107  */
108 static ldst_info_t *get_ldst_info(ir_node *node, struct obstack *obst)
109 {
110         ldst_info_t *info = (ldst_info_t*)get_irn_link(node);
111
112         if (! info) {
113                 info = OALLOCZ(obst, ldst_info_t);
114                 set_irn_link(node, info);
115         }
116         return info;
117 }  /* get_ldst_info */
118
119 /**
120  * get the Block info of a node
121  */
122 static block_info_t *get_block_info(ir_node *node, struct obstack *obst)
123 {
124         block_info_t *info = (block_info_t*)get_irn_link(node);
125
126         if (! info) {
127                 info = OALLOCZ(obst, block_info_t);
128                 set_irn_link(node, info);
129         }
130         return info;
131 }  /* get_block_info */
132
133 /**
134  * update the projection info for a Load/Store
135  */
136 static unsigned update_projs(ldst_info_t *info, ir_node *proj)
137 {
138         long nr = get_Proj_proj(proj);
139
140         assert(0 <= nr && nr <= MAX_PROJ && "Wrong proj from LoadStore");
141
142         if (info->projs[nr]) {
143                 /* there is already one, do CSE */
144                 exchange(proj, info->projs[nr]);
145                 return DF_CHANGED;
146         }
147         else {
148                 info->projs[nr] = proj;
149                 return 0;
150         }
151 }  /* update_projs */
152
153 /**
154  * update the exception block info for a Load/Store node.
155  *
156  * @param info   the load/store info struct
157  * @param block  the exception handler block for this load/store
158  * @param pos    the control flow input of the block
159  */
160 static unsigned update_exc(ldst_info_t *info, ir_node *block, int pos)
161 {
162         assert(info->exc_block == NULL && "more than one exception block found");
163
164         info->exc_block = block;
165         info->exc_idx   = pos;
166         return 0;
167 }  /* update_exc */
168
169 /** Return the number of uses of an address node */
170 #define get_irn_n_uses(adr)     get_irn_n_edges(adr)
171
172 /**
173  * walker, collects all Load/Store/Proj nodes
174  *
175  * walks from Start -> End
176  */
177 static void collect_nodes(ir_node *node, void *env)
178 {
179         walk_env_t  *wenv   = (walk_env_t *)env;
180         unsigned     opcode = get_irn_opcode(node);
181         ir_node     *pred, *blk, *pred_blk;
182         ldst_info_t *ldst_info;
183
184         if (opcode == iro_Proj) {
185                 pred   = get_Proj_pred(node);
186                 opcode = get_irn_opcode(pred);
187
188                 if (opcode == iro_Load || opcode == iro_Store || opcode == iro_Call) {
189                         ldst_info = get_ldst_info(pred, &wenv->obst);
190
191                         wenv->changes |= update_projs(ldst_info, node);
192
193                         /*
194                          * Place the Proj's to the same block as the
195                          * predecessor Load. This is always ok and prevents
196                          * "non-SSA" form after optimizations if the Proj
197                          * is in a wrong block.
198                          */
199                         blk      = get_nodes_block(node);
200                         pred_blk = get_nodes_block(pred);
201                         if (blk != pred_blk) {
202                                 wenv->changes |= DF_CHANGED;
203                                 set_nodes_block(node, pred_blk);
204                         }
205                 }
206         } else if (opcode == iro_Block) {
207                 int i;
208
209                 for (i = get_Block_n_cfgpreds(node) - 1; i >= 0; --i) {
210                         ir_node      *pred_block, *proj;
211                         block_info_t *bl_info;
212                         int          is_exc = 0;
213
214                         pred = proj = get_Block_cfgpred(node, i);
215
216                         if (is_Proj(proj)) {
217                                 pred   = get_Proj_pred(proj);
218                                 is_exc = is_x_except_Proj(proj);
219                         }
220
221                         /* ignore Bad predecessors, they will be removed later */
222                         if (is_Bad(pred))
223                                 continue;
224
225                         pred_block = get_nodes_block(pred);
226                         bl_info    = get_block_info(pred_block, &wenv->obst);
227
228                         if (is_fragile_op(pred) && is_exc)
229                                 bl_info->flags |= BLOCK_HAS_EXC;
230                         else if (is_irn_forking(pred))
231                                 bl_info->flags |= BLOCK_HAS_COND;
232
233                         opcode = get_irn_opcode(pred);
234                         if (is_exc && (opcode == iro_Load || opcode == iro_Store || opcode == iro_Call)) {
235                                 ldst_info = get_ldst_info(pred, &wenv->obst);
236
237                                 wenv->changes |= update_exc(ldst_info, node, i);
238                         }
239                 }
240         }
241 }  /* collect_nodes */
242
243 /**
244  * Returns an entity if the address ptr points to a constant one.
245  *
246  * @param ptr  the address
247  *
248  * @return an entity or NULL
249  */
250 static ir_entity *find_constant_entity(ir_node *ptr)
251 {
252         for (;;) {
253                 if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
254                         return get_SymConst_entity(ptr);
255                 } else if (is_Sel(ptr)) {
256                         ir_entity *ent = get_Sel_entity(ptr);
257                         ir_type   *tp  = get_entity_owner(ent);
258
259                         /* Do not fiddle with polymorphism. */
260                         if (is_Class_type(get_entity_owner(ent)) &&
261                                 ((get_entity_n_overwrites(ent)    != 0) ||
262                                 (get_entity_n_overwrittenby(ent) != 0)   ) )
263                                 return NULL;
264
265                         if (is_Array_type(tp)) {
266                                 /* check bounds */
267                                 int i, n;
268
269                                 for (i = 0, n = get_Sel_n_indexs(ptr); i < n; ++i) {
270                                         ir_node   *bound;
271                                         ir_tarval *tlower, *tupper;
272                                         ir_node   *index = get_Sel_index(ptr, i);
273                                         ir_tarval *tv    = computed_value(index);
274
275                                         /* check if the index is constant */
276                                         if (tv == tarval_bad)
277                                                 return NULL;
278
279                                         bound  = get_array_lower_bound(tp, i);
280                                         tlower = computed_value(bound);
281                                         bound  = get_array_upper_bound(tp, i);
282                                         tupper = computed_value(bound);
283
284                                         if (tlower == tarval_bad || tupper == tarval_bad)
285                                                 return NULL;
286
287                                         if (tarval_cmp(tv, tlower) == ir_relation_less)
288                                                 return NULL;
289                                         if (tarval_cmp(tupper, tv) == ir_relation_less)
290                                                 return NULL;
291
292                                         /* ok, bounds check finished */
293                                 }
294                         }
295
296                         if (get_entity_linkage(ent) & IR_LINKAGE_CONSTANT)
297                                 return ent;
298
299                         /* try next */
300                         ptr = get_Sel_ptr(ptr);
301                 } else if (is_Add(ptr)) {
302                         ir_node *l = get_Add_left(ptr);
303                         ir_node *r = get_Add_right(ptr);
304
305                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
306                                 ptr = l;
307                         else if (get_irn_mode(r) == get_irn_mode(ptr) && is_Const(l))
308                                 ptr = r;
309                         else
310                                 return NULL;
311
312                         /* for now, we support only one addition, reassoc should fold all others */
313                         if (! is_SymConst(ptr) && !is_Sel(ptr))
314                                 return NULL;
315                 } else if (is_Sub(ptr)) {
316                         ir_node *l = get_Sub_left(ptr);
317                         ir_node *r = get_Sub_right(ptr);
318
319                         if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
320                                 ptr = l;
321                         else
322                                 return NULL;
323                         /* for now, we support only one substraction, reassoc should fold all others */
324                         if (! is_SymConst(ptr) && !is_Sel(ptr))
325                                 return NULL;
326                 } else
327                         return NULL;
328         }
329 }  /* find_constant_entity */
330
331 /**
332  * Return the Selection index of a Sel node from dimension n
333  */
334 static long get_Sel_array_index_long(ir_node *n, int dim)
335 {
336         ir_node *index = get_Sel_index(n, dim);
337         assert(is_Const(index));
338         return get_tarval_long(get_Const_tarval(index));
339 }  /* get_Sel_array_index_long */
340
341 typedef struct path_entry {
342         ir_entity         *ent;
343         struct path_entry *next;
344         size_t            index;
345 } path_entry;
346
347 static ir_node *rec_find_compound_ent_value(ir_node *ptr, path_entry *next)
348 {
349         path_entry       entry, *p;
350         ir_entity        *ent, *field;
351         ir_initializer_t *initializer;
352         ir_tarval        *tv;
353         ir_type          *tp;
354         size_t           n;
355
356         entry.next = next;
357         if (is_SymConst(ptr)) {
358                 /* found the root */
359                 ent         = get_SymConst_entity(ptr);
360                 initializer = get_entity_initializer(ent);
361                 for (p = next; p != NULL;) {
362                         if (initializer->kind != IR_INITIALIZER_COMPOUND)
363                                 return NULL;
364                         n  = get_initializer_compound_n_entries(initializer);
365                         tp = get_entity_type(ent);
366
367                         if (is_Array_type(tp)) {
368                                 ent = get_array_element_entity(tp);
369                                 if (ent != p->ent) {
370                                         /* a missing [0] */
371                                         if (0 >= n)
372                                                 return NULL;
373                                         initializer = get_initializer_compound_value(initializer, 0);
374                                         continue;
375                                 }
376                         }
377                         if (p->index >= n)
378                                 return NULL;
379                         initializer = get_initializer_compound_value(initializer, p->index);
380
381                         ent = p->ent;
382                         p   = p->next;
383                 }
384                 tp = get_entity_type(ent);
385                 while (is_Array_type(tp)) {
386                         ent = get_array_element_entity(tp);
387                         tp = get_entity_type(ent);
388                         /* a missing [0] */
389                         n  = get_initializer_compound_n_entries(initializer);
390                         if (0 >= n)
391                                 return NULL;
392                         initializer = get_initializer_compound_value(initializer, 0);
393                 }
394
395                 switch (initializer->kind) {
396                 case IR_INITIALIZER_CONST:
397                         return get_initializer_const_value(initializer);
398                 case IR_INITIALIZER_TARVAL:
399                 case IR_INITIALIZER_NULL:
400                 default:
401                         return NULL;
402                 }
403         } else if (is_Sel(ptr)) {
404                 entry.ent = field = get_Sel_entity(ptr);
405                 tp = get_entity_owner(field);
406                 if (is_Array_type(tp)) {
407                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
408                         entry.index = get_Sel_array_index_long(ptr, 0) - get_array_lower_bound_int(tp, 0);
409                 } else {
410                         size_t i, n_members = get_compound_n_members(tp);
411                         for (i = 0; i < n_members; ++i) {
412                                 if (get_compound_member(tp, i) == field)
413                                         break;
414                         }
415                         if (i >= n_members) {
416                                 /* not found: should NOT happen */
417                                 return NULL;
418                         }
419                         entry.index = i;
420                 }
421                 return rec_find_compound_ent_value(get_Sel_ptr(ptr), &entry);
422         }  else if (is_Add(ptr)) {
423                 ir_mode  *mode;
424                 unsigned pos;
425
426                 {
427                         ir_node *l = get_Add_left(ptr);
428                         ir_node *r = get_Add_right(ptr);
429                         if (is_Const(r)) {
430                                 ptr = l;
431                                 tv  = get_Const_tarval(r);
432                         } else {
433                                 ptr = r;
434                                 tv  = get_Const_tarval(l);
435                         }
436                 }
437 ptr_arith:
438                 mode = get_tarval_mode(tv);
439
440                 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
441                 if (is_Sel(ptr)) {
442                         field = get_Sel_entity(ptr);
443                 } else {
444                         field = get_SymConst_entity(ptr);
445                 }
446
447                 /* count needed entries */
448                 pos = 0;
449                 for (ent = field;;) {
450                         tp = get_entity_type(ent);
451                         if (! is_Array_type(tp))
452                                 break;
453                         ent = get_array_element_entity(tp);
454                         ++pos;
455                 }
456                 /* should be at least ONE entry */
457                 if (pos == 0)
458                         return NULL;
459
460                 /* allocate the right number of entries */
461                 NEW_ARR_A(path_entry, p, pos);
462
463                 /* fill them up */
464                 pos = 0;
465                 for (ent = field;;) {
466                         unsigned   size;
467                         ir_tarval *sz, *tv_index, *tlower, *tupper;
468                         long       index;
469                         ir_node   *bound;
470
471                         tp = get_entity_type(ent);
472                         if (! is_Array_type(tp))
473                                 break;
474                         ent = get_array_element_entity(tp);
475                         p[pos].ent  = ent;
476                         p[pos].next = &p[pos + 1];
477
478                         size = get_type_size_bytes(get_entity_type(ent));
479                         sz   = new_tarval_from_long(size, mode);
480
481                         tv_index = tarval_div(tv, sz);
482                         tv       = tarval_mod(tv, sz);
483
484                         if (tv_index == tarval_bad || tv == tarval_bad)
485                                 return NULL;
486
487                         assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
488                         bound  = get_array_lower_bound(tp, 0);
489                         tlower = computed_value(bound);
490                         bound  = get_array_upper_bound(tp, 0);
491                         tupper = computed_value(bound);
492
493                         if (tlower == tarval_bad || tupper == tarval_bad)
494                                 return NULL;
495
496                         if (tarval_cmp(tv_index, tlower) == ir_relation_less)
497                                 return NULL;
498                         if (tarval_cmp(tupper, tv_index) == ir_relation_less)
499                                 return NULL;
500
501                         /* ok, bounds check finished */
502                         index = get_tarval_long(tv_index);
503                         p[pos].index = index;
504                         ++pos;
505                 }
506                 if (! tarval_is_null(tv)) {
507                         /* hmm, wrong access */
508                         return NULL;
509                 }
510                 p[pos - 1].next = next;
511                 return rec_find_compound_ent_value(ptr, p);
512         } else if (is_Sub(ptr)) {
513                 ir_node *l = get_Sub_left(ptr);
514                 ir_node *r = get_Sub_right(ptr);
515
516                 ptr = l;
517                 tv  = get_Const_tarval(r);
518                 tv  = tarval_neg(tv);
519                 goto ptr_arith;
520         }
521         return NULL;
522 }
523
524 static ir_node *find_compound_ent_value(ir_node *ptr)
525 {
526         return rec_find_compound_ent_value(ptr, NULL);
527 }
528
529 /* forward */
530 static void reduce_adr_usage(ir_node *ptr);
531
532 /**
533  * Update a Load that may have lost its users.
534  */
535 static void handle_load_update(ir_node *load)
536 {
537         ldst_info_t *info = (ldst_info_t*)get_irn_link(load);
538
539         /* do NOT touch volatile loads for now */
540         if (get_Load_volatility(load) == volatility_is_volatile)
541                 return;
542
543         if (! info->projs[pn_Load_res] && ! info->projs[pn_Load_X_except]) {
544                 ir_node *ptr = get_Load_ptr(load);
545                 ir_node *mem = get_Load_mem(load);
546
547                 /* a Load whose value is neither used nor exception checked, remove it */
548                 exchange(info->projs[pn_Load_M], mem);
549                 if (info->projs[pn_Load_X_regular])
550                         exchange(info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
551                 kill_node(load);
552                 reduce_adr_usage(ptr);
553         }
554 }  /* handle_load_update */
555
556 /**
557  * A use of an address node has vanished. Check if this was a Proj
558  * node and update the counters.
559  */
560 static void reduce_adr_usage(ir_node *ptr)
561 {
562         ir_node *pred;
563         if (!is_Proj(ptr))
564                 return;
565         if (get_irn_n_edges(ptr) > 0)
566                 return;
567
568         /* this Proj is dead now */
569         pred = get_Proj_pred(ptr);
570         if (is_Load(pred)) {
571                 ldst_info_t *info = (ldst_info_t*)get_irn_link(pred);
572                 info->projs[get_Proj_proj(ptr)] = NULL;
573
574                 /* this node lost its result proj, handle that */
575                 handle_load_update(pred);
576         }
577 }  /* reduce_adr_usage */
578
579 /**
580  * Check, if an already existing value of mode old_mode can be converted
581  * into the needed one new_mode without loss.
582  */
583 static int can_use_stored_value(ir_mode *old_mode, ir_mode *new_mode)
584 {
585         unsigned old_size;
586         unsigned new_size;
587         if (old_mode == new_mode)
588                 return true;
589
590         old_size = get_mode_size_bits(old_mode);
591         new_size = get_mode_size_bits(new_mode);
592
593         /* if both modes are two-complement ones, we can always convert the
594            Stored value into the needed one. (on big endian machines we currently
595            only support this for modes of same size) */
596         if (old_size >= new_size &&
597                   get_mode_arithmetic(old_mode) == irma_twos_complement &&
598                   get_mode_arithmetic(new_mode) == irma_twos_complement &&
599                   (!be_get_backend_param()->byte_order_big_endian
600                 || old_size == new_size)) {
601                 return true;
602         }
603         return false;
604 }
605
606 /**
607  * Check whether a Call is at least pure, i.e. does only read memory.
608  */
609 static unsigned is_Call_pure(ir_node *call)
610 {
611         ir_type *call_tp = get_Call_type(call);
612         unsigned prop = get_method_additional_properties(call_tp);
613
614         /* check first the call type */
615         if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
616                 /* try the called entity */
617                 ir_node *ptr = get_Call_ptr(call);
618
619                 if (is_SymConst_addr_ent(ptr)) {
620                         ir_entity *ent = get_SymConst_entity(ptr);
621
622                         prop = get_entity_additional_properties(ent);
623                 }
624         }
625         return (prop & (mtp_property_const|mtp_property_pure)) != 0;
626 }  /* is_Call_pure */
627
628 static ir_node *get_base_and_offset(ir_node *ptr, long *pOffset)
629 {
630         ir_mode *mode  = get_irn_mode(ptr);
631         long    offset = 0;
632
633         /* TODO: long might not be enough, we should probably use some tarval thingy... */
634         for (;;) {
635                 if (is_Add(ptr)) {
636                         ir_node *l = get_Add_left(ptr);
637                         ir_node *r = get_Add_right(ptr);
638
639                         if (get_irn_mode(l) != mode || !is_Const(r))
640                                 break;
641
642                         offset += get_tarval_long(get_Const_tarval(r));
643                         ptr     = l;
644                 } else if (is_Sub(ptr)) {
645                         ir_node *l = get_Sub_left(ptr);
646                         ir_node *r = get_Sub_right(ptr);
647
648                         if (get_irn_mode(l) != mode || !is_Const(r))
649                                 break;
650
651                         offset -= get_tarval_long(get_Const_tarval(r));
652                         ptr     = l;
653                 } else if (is_Sel(ptr)) {
654                         ir_entity *ent = get_Sel_entity(ptr);
655                         ir_type   *tp  = get_entity_owner(ent);
656
657                         if (is_Array_type(tp)) {
658                                 int     size;
659                                 ir_node *index;
660
661                                 /* only one dimensional arrays yet */
662                                 if (get_Sel_n_indexs(ptr) != 1)
663                                         break;
664                                 index = get_Sel_index(ptr, 0);
665                                 if (! is_Const(index))
666                                         break;
667
668                                 tp = get_entity_type(ent);
669                                 if (get_type_state(tp) != layout_fixed)
670                                         break;
671
672                                 size    = get_type_size_bytes(tp);
673                                 offset += size * get_tarval_long(get_Const_tarval(index));
674                         } else {
675                                 if (get_type_state(tp) != layout_fixed)
676                                         break;
677                                 offset += get_entity_offset(ent);
678                         }
679                         ptr = get_Sel_ptr(ptr);
680                 } else
681                         break;
682         }
683
684         *pOffset = offset;
685         return ptr;
686 }
687
688 static int try_load_after_store(ir_node *load,
689                 ir_node *load_base_ptr, long load_offset, ir_node *store)
690 {
691         ldst_info_t *info;
692         ir_node *store_ptr      = get_Store_ptr(store);
693         long     store_offset;
694         ir_node *store_base_ptr = get_base_and_offset(store_ptr, &store_offset);
695         ir_node *store_value;
696         ir_mode *store_mode;
697         ir_node *load_ptr;
698         ir_mode *load_mode;
699         long     load_mode_len;
700         long     store_mode_len;
701         long     delta;
702         int      res;
703
704         if (load_base_ptr != store_base_ptr)
705                 return 0;
706
707         load_mode      = get_Load_mode(load);
708         load_mode_len  = get_mode_size_bytes(load_mode);
709         store_mode     = get_irn_mode(get_Store_value(store));
710         store_mode_len = get_mode_size_bytes(store_mode);
711         delta          = load_offset - store_offset;
712         store_value    = get_Store_value(store);
713
714         if (delta != 0 || store_mode != load_mode) {
715                 /* TODO: implement for big-endian */
716                 if (delta < 0 || delta + load_mode_len > store_mode_len
717                                 || (be_get_backend_param()->byte_order_big_endian
718                                     && load_mode_len != store_mode_len))
719                         return 0;
720
721                 if (get_mode_arithmetic(store_mode) != irma_twos_complement ||
722                         get_mode_arithmetic(load_mode)  != irma_twos_complement)
723                         return 0;
724
725
726                 /* produce a shift to adjust offset delta */
727                 if (delta > 0) {
728                         ir_node *cnst;
729                         ir_graph *irg = get_irn_irg(load);
730
731                         cnst        = new_r_Const_long(irg, mode_Iu, delta * 8);
732                         store_value = new_r_Shr(get_nodes_block(load),
733                                                                         store_value, cnst, store_mode);
734                 }
735
736                 /* add an convert if needed */
737                 if (store_mode != load_mode) {
738                         store_value = new_r_Conv(get_nodes_block(load), store_value, load_mode);
739                 }
740         }
741
742         DBG_OPT_RAW(load, store_value);
743
744         info = (ldst_info_t*)get_irn_link(load);
745         if (info->projs[pn_Load_M])
746                 exchange(info->projs[pn_Load_M], get_Load_mem(load));
747
748         res = 0;
749         /* no exception */
750         if (info->projs[pn_Load_X_except]) {
751                 ir_graph *irg = get_irn_irg(load);
752                 exchange( info->projs[pn_Load_X_except], new_r_Bad(irg, mode_X));
753                 res |= CF_CHANGED;
754         }
755         if (info->projs[pn_Load_X_regular]) {
756                 exchange( info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
757                 res |= CF_CHANGED;
758         }
759
760         if (info->projs[pn_Load_res])
761                 exchange(info->projs[pn_Load_res], store_value);
762
763         load_ptr = get_Load_ptr(load);
764         kill_node(load);
765         reduce_adr_usage(load_ptr);
766         return res | DF_CHANGED;
767 }
768
769 /**
770  * Follow the memory chain as long as there are only Loads,
771  * alias free Stores, and constant Calls and try to replace the
772  * current Load by a previous ones.
773  * Note that in unreachable loops it might happen that we reach
774  * load again, as well as we can fall into a cycle.
775  * We break such cycles using a special visited flag.
776  *
777  * INC_MASTER() must be called before dive into
778  */
779 static unsigned follow_Mem_chain(ir_node *load, ir_node *curr)
780 {
781         unsigned    res = 0;
782         ldst_info_t *info = (ldst_info_t*)get_irn_link(load);
783         ir_node     *pred;
784         ir_node     *ptr       = get_Load_ptr(load);
785         ir_node     *mem       = get_Load_mem(load);
786         ir_mode     *load_mode = get_Load_mode(load);
787
788         for (pred = curr; load != pred; ) {
789                 ldst_info_t *pred_info = (ldst_info_t*)get_irn_link(pred);
790
791                 /*
792                  * a Load immediately after a Store -- a read after write.
793                  * We may remove the Load, if both Load & Store does not have an
794                  * exception handler OR they are in the same Block. In the latter
795                  * case the Load cannot throw an exception when the previous Store was
796                  * quiet.
797                  *
798                  * Why we need to check for Store Exception? If the Store cannot
799                  * be executed (ROM) the exception handler might simply jump into
800                  * the load Block :-(
801                  * We could make it a little bit better if we would know that the
802                  * exception handler of the Store jumps directly to the end...
803                  */
804                 if (is_Store(pred) && ((pred_info->projs[pn_Store_X_except] == NULL
805                                 && info->projs[pn_Load_X_except] == NULL)
806                                 || get_nodes_block(load) == get_nodes_block(pred)))
807                 {
808                         long    load_offset;
809                         ir_node *base_ptr = get_base_and_offset(ptr, &load_offset);
810                         int     changes   = try_load_after_store(load, base_ptr, load_offset, pred);
811
812                         if (changes != 0)
813                                 return res | changes;
814                 } else if (is_Load(pred) && get_Load_ptr(pred) == ptr &&
815                            can_use_stored_value(get_Load_mode(pred), load_mode)) {
816                         /*
817                          * a Load after a Load -- a read after read.
818                          * We may remove the second Load, if it does not have an exception
819                          * handler OR they are in the same Block. In the later case
820                          * the Load cannot throw an exception when the previous Load was
821                          * quiet.
822                          *
823                          * Here, there is no need to check if the previous Load has an
824                          * exception hander because they would have exact the same
825                          * exception...
826                          *
827                          * TODO: implement load-after-load with different mode for big
828                          *       endian
829                          */
830                         if (info->projs[pn_Load_X_except] == NULL
831                                         || get_nodes_block(load) == get_nodes_block(pred)) {
832                                 ir_node *value;
833
834                                 DBG_OPT_RAR(load, pred);
835
836                                 /* the result is used */
837                                 if (info->projs[pn_Load_res]) {
838                                         if (pred_info->projs[pn_Load_res] == NULL) {
839                                                 /* create a new Proj again */
840                                                 pred_info->projs[pn_Load_res] = new_r_Proj(pred, get_Load_mode(pred), pn_Load_res);
841                                         }
842                                         value = pred_info->projs[pn_Load_res];
843
844                                         /* add an convert if needed */
845                                         if (get_Load_mode(pred) != load_mode) {
846                                                 value = new_r_Conv(get_nodes_block(load), value, load_mode);
847                                         }
848
849                                         exchange(info->projs[pn_Load_res], value);
850                                 }
851
852                                 if (info->projs[pn_Load_M])
853                                         exchange(info->projs[pn_Load_M], mem);
854
855                                 /* no exception */
856                                 if (info->projs[pn_Load_X_except]) {
857                                         ir_graph *irg = get_irn_irg(load);
858                                         exchange(info->projs[pn_Load_X_except], new_r_Bad(irg, mode_X));
859                                         res |= CF_CHANGED;
860                                 }
861                                 if (info->projs[pn_Load_X_regular]) {
862                                         exchange( info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
863                                         res |= CF_CHANGED;
864                                 }
865
866                                 kill_node(load);
867                                 reduce_adr_usage(ptr);
868                                 return res |= DF_CHANGED;
869                         }
870                 }
871
872                 if (is_Store(pred)) {
873                         /* check if we can pass through this store */
874                         ir_alias_relation rel = get_alias_relation(
875                                 get_Store_ptr(pred),
876                                 get_irn_mode(get_Store_value(pred)),
877                                 ptr, load_mode);
878                         /* if the might be an alias, we cannot pass this Store */
879                         if (rel != ir_no_alias)
880                                 break;
881                         pred = skip_Proj(get_Store_mem(pred));
882                 } else if (is_Load(pred)) {
883                         pred = skip_Proj(get_Load_mem(pred));
884                 } else if (is_Call(pred)) {
885                         if (is_Call_pure(pred)) {
886                                 /* The called graph is at least pure, so there are no Store's
887                                    in it. We can handle it like a Load and skip it. */
888                                 pred = skip_Proj(get_Call_mem(pred));
889                         } else {
890                                 /* there might be Store's in the graph, stop here */
891                                 break;
892                         }
893                 } else {
894                         /* follow only Load chains */
895                         break;
896                 }
897
898                 /* check for cycles */
899                 if (NODE_VISITED(pred_info))
900                         break;
901                 MARK_NODE(pred_info);
902         }
903
904         if (is_Sync(pred)) {
905                 int i;
906
907                 /* handle all Sync predecessors */
908                 for (i = get_Sync_n_preds(pred) - 1; i >= 0; --i) {
909                         res |= follow_Mem_chain(load, skip_Proj(get_Sync_pred(pred, i)));
910                         if (res)
911                                 return res;
912                 }
913         }
914
915         return res;
916 }  /* follow_Mem_chain */
917
918 /*
919  * Check if we can replace the load by a given const from
920  * the const code irg.
921  */
922 ir_node *can_replace_load_by_const(const ir_node *load, ir_node *c)
923 {
924         ir_mode  *c_mode = get_irn_mode(c);
925         ir_mode  *l_mode = get_Load_mode(load);
926         ir_node  *block  = get_nodes_block(load);
927         dbg_info *dbgi   = get_irn_dbg_info(load);
928         ir_node  *res    = copy_const_value(dbgi, c, block);
929
930         if (c_mode != l_mode) {
931                 /* check, if the mode matches OR can be easily converted info */
932                 if (is_reinterpret_cast(c_mode, l_mode)) {
933                         /* copy the value from the const code irg and cast it */
934                         res = new_rd_Conv(dbgi, block, res, l_mode);
935                 } else {
936                         return NULL;
937                 }
938         }
939         return res;
940 }
941
942 /**
943  * optimize a Load
944  *
945  * @param load  the Load node
946  */
947 static unsigned optimize_load(ir_node *load)
948 {
949         ldst_info_t *info = (ldst_info_t*)get_irn_link(load);
950         ir_node     *mem, *ptr, *value;
951         ir_entity   *ent;
952         long        dummy;
953         unsigned    res = 0;
954
955         /* do NOT touch volatile loads for now */
956         if (get_Load_volatility(load) == volatility_is_volatile)
957                 return 0;
958
959         /* the address of the load to be optimized */
960         ptr = get_Load_ptr(load);
961
962         /* The mem of the Load. Must still be returned after optimization. */
963         mem = get_Load_mem(load);
964
965         if (info->projs[pn_Load_res] == NULL
966                         && info->projs[pn_Load_X_except] == NULL) {
967                 /* the value is never used and we don't care about exceptions, remove */
968                 exchange(info->projs[pn_Load_M], mem);
969
970                 if (info->projs[pn_Load_X_regular]) {
971                         /* should not happen, but if it does, remove it */
972                         exchange(info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
973                         res |= CF_CHANGED;
974                 }
975                 kill_node(load);
976                 reduce_adr_usage(ptr);
977                 return res | DF_CHANGED;
978         }
979
980         value = NULL;
981         /* check if we can determine the entity that will be loaded */
982         ent = find_constant_entity(ptr);
983         if (ent != NULL
984                         && get_entity_visibility(ent) != ir_visibility_external) {
985                 /* a static allocation that is not external: there should be NO
986                  * exception when loading even if we cannot replace the load itself.
987                  */
988
989                 /* no exception, clear the info field as it might be checked later again */
990                 if (info->projs[pn_Load_X_except]) {
991                         ir_graph *irg = get_irn_irg(load);
992                         exchange(info->projs[pn_Load_X_except], new_r_Bad(irg, mode_X));
993                         info->projs[pn_Load_X_except] = NULL;
994                         res |= CF_CHANGED;
995                 }
996                 if (info->projs[pn_Load_X_regular]) {
997                         exchange(info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
998                         info->projs[pn_Load_X_regular] = NULL;
999                         res |= CF_CHANGED;
1000                 }
1001
1002                 if (get_entity_linkage(ent) & IR_LINKAGE_CONSTANT) {
1003                         if (has_entity_initializer(ent)) {
1004                                 /* new style initializer */
1005                                 value = find_compound_ent_value(ptr);
1006                         }
1007                         if (value != NULL) {
1008                                 ir_graph *irg = get_irn_irg(load);
1009                                 value = can_replace_load_by_const(load, value);
1010                                 if (value != NULL && is_Sel(ptr)) {
1011                                         /* frontend has inserted masking operations after bitfield accesses,
1012                                          * so we might have to shift the const. */
1013                                         unsigned char bit_offset = get_entity_offset_bits_remainder(get_Sel_entity(ptr));
1014                                         ir_tarval *tv_old = get_Const_tarval(value);
1015                                         ir_tarval *tv_offset = new_tarval_from_long(bit_offset, mode_Bu);
1016                                         ir_tarval *tv_new = tarval_shl(tv_old, tv_offset);
1017                                         value = new_r_Const(irg, tv_new);
1018                                 }
1019                         }
1020                 }
1021         }
1022         if (value != NULL) {
1023                 /* we completely replace the load by this value */
1024                 if (info->projs[pn_Load_X_except]) {
1025                         ir_graph *irg = get_irn_irg(load);
1026                         exchange(info->projs[pn_Load_X_except], new_r_Bad(irg, mode_X));
1027                         info->projs[pn_Load_X_except] = NULL;
1028                         res |= CF_CHANGED;
1029                 }
1030                 if (info->projs[pn_Load_X_regular]) {
1031                         exchange(info->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
1032                         info->projs[pn_Load_X_regular] = NULL;
1033                         res |= CF_CHANGED;
1034                 }
1035                 if (info->projs[pn_Load_M]) {
1036                         exchange(info->projs[pn_Load_M], mem);
1037                         res |= DF_CHANGED;
1038                 }
1039                 if (info->projs[pn_Load_res]) {
1040                         exchange(info->projs[pn_Load_res], value);
1041                         res |= DF_CHANGED;
1042                 }
1043                 kill_node(load);
1044                 reduce_adr_usage(ptr);
1045                 return res;
1046         }
1047
1048         /* Check, if the address of this load is used more than once.
1049          * If not, more load cannot be removed in any case. */
1050         if (get_irn_n_uses(ptr) <= 1 && get_irn_n_uses(get_base_and_offset(ptr, &dummy)) <= 1)
1051                 return res;
1052
1053         /*
1054          * follow the memory chain as long as there are only Loads
1055          * and try to replace current Load or Store by a previous one.
1056          * Note that in unreachable loops it might happen that we reach
1057          * load again, as well as we can fall into a cycle.
1058          * We break such cycles using a special visited flag.
1059          */
1060         INC_MASTER();
1061         res = follow_Mem_chain(load, skip_Proj(mem));
1062         return res;
1063 }  /* optimize_load */
1064
1065 /**
1066  * Check whether a value of mode new_mode would completely overwrite a value
1067  * of mode old_mode in memory.
1068  */
1069 static int is_completely_overwritten(ir_mode *old_mode, ir_mode *new_mode)
1070 {
1071         return get_mode_size_bits(new_mode) >= get_mode_size_bits(old_mode);
1072 }  /* is_completely_overwritten */
1073
1074 /**
1075  * Check whether small is a part of large (starting at same address).
1076  */
1077 static int is_partially_same(ir_node *small, ir_node *large)
1078 {
1079         ir_mode *sm = get_irn_mode(small);
1080         ir_mode *lm = get_irn_mode(large);
1081
1082         /* FIXME: Check endianness */
1083         return is_Conv(small) && get_Conv_op(small) == large
1084             && get_mode_size_bytes(sm) < get_mode_size_bytes(lm)
1085             && get_mode_arithmetic(sm) == irma_twos_complement
1086             && get_mode_arithmetic(lm) == irma_twos_complement;
1087 }  /* is_partially_same */
1088
1089 /**
1090  * follow the memory chain as long as there are only Loads and alias free Stores.
1091  *
1092  * INC_MASTER() must be called before dive into
1093  */
1094 static unsigned follow_Mem_chain_for_Store(ir_node *store, ir_node *curr)
1095 {
1096         unsigned res = 0;
1097         ldst_info_t *info = (ldst_info_t*)get_irn_link(store);
1098         ir_node *pred;
1099         ir_node *ptr = get_Store_ptr(store);
1100         ir_node *mem = get_Store_mem(store);
1101         ir_node *value = get_Store_value(store);
1102         ir_mode *mode  = get_irn_mode(value);
1103         ir_node *block = get_nodes_block(store);
1104
1105         for (pred = curr; pred != store;) {
1106                 ldst_info_t *pred_info = (ldst_info_t*)get_irn_link(pred);
1107
1108                 /*
1109                  * BEWARE: one might think that checking the modes is useless, because
1110                  * if the pointers are identical, they refer to the same object.
1111                  * This is only true in strong typed languages, not is C were the following
1112                  * is possible *(ir_type1 *)p = a; *(ir_type2 *)p = b ...
1113                  * However, if the size of the mode that is written is bigger or equal the
1114                  * size of the old one, the old value is completely overwritten and can be
1115                  * killed ...
1116                  */
1117                 if (is_Store(pred) && get_Store_ptr(pred) == ptr &&
1118             get_nodes_block(pred) == block) {
1119                         /*
1120                          * a Store after a Store in the same Block -- a write after write.
1121                          */
1122
1123                         /*
1124                          * We may remove the first Store, if the old value is completely
1125                          * overwritten or the old value is a part of the new value,
1126                          * and if it does not have an exception handler.
1127                          *
1128                          * TODO: What, if both have the same exception handler ???
1129                          */
1130                         if (get_Store_volatility(pred) != volatility_is_volatile
1131                                 && !pred_info->projs[pn_Store_X_except]) {
1132                                 ir_node *predvalue = get_Store_value(pred);
1133                                 ir_mode *predmode  = get_irn_mode(predvalue);
1134
1135                                 if (is_completely_overwritten(predmode, mode)
1136                                         || is_partially_same(predvalue, value)) {
1137                                         DBG_OPT_WAW(pred, store);
1138                                         exchange(pred_info->projs[pn_Store_M], get_Store_mem(pred));
1139                                         kill_node(pred);
1140                                         reduce_adr_usage(ptr);
1141                                         return DF_CHANGED;
1142                                 }
1143                         }
1144
1145                         /*
1146                          * We may remove the Store, if the old value already contains
1147                          * the new value, and if it does not have an exception handler.
1148                          *
1149                          * TODO: What, if both have the same exception handler ???
1150                          */
1151                         if (get_Store_volatility(store) != volatility_is_volatile
1152                                 && !info->projs[pn_Store_X_except]) {
1153                                 ir_node *predvalue = get_Store_value(pred);
1154
1155                                 if (is_partially_same(value, predvalue)) {
1156                                         DBG_OPT_WAW(pred, store);
1157                                         exchange(info->projs[pn_Store_M], mem);
1158                                         kill_node(store);
1159                                         reduce_adr_usage(ptr);
1160                                         return DF_CHANGED;
1161                                 }
1162                         }
1163                 } else if (is_Load(pred) && get_Load_ptr(pred) == ptr &&
1164                            value == pred_info->projs[pn_Load_res]) {
1165                         /*
1166                          * a Store of a value just loaded from the same address
1167                          * -- a write after read.
1168                          * We may remove the Store, if it does not have an exception
1169                          * handler.
1170                          */
1171                         if (! info->projs[pn_Store_X_except]) {
1172                                 DBG_OPT_WAR(store, pred);
1173                                 exchange(info->projs[pn_Store_M], mem);
1174                                 kill_node(store);
1175                                 reduce_adr_usage(ptr);
1176                                 return DF_CHANGED;
1177                         }
1178                 }
1179
1180                 if (is_Store(pred)) {
1181                         /* check if we can pass through this store */
1182                         ir_alias_relation rel = get_alias_relation(
1183                                 get_Store_ptr(pred),
1184                                 get_irn_mode(get_Store_value(pred)),
1185                                 ptr, mode);
1186                         /* if the might be an alias, we cannot pass this Store */
1187                         if (rel != ir_no_alias)
1188                                 break;
1189                         pred = skip_Proj(get_Store_mem(pred));
1190                 } else if (is_Load(pred)) {
1191                         ir_alias_relation rel = get_alias_relation(
1192                                 get_Load_ptr(pred), get_Load_mode(pred),
1193                                 ptr, mode);
1194                         if (rel != ir_no_alias)
1195                                 break;
1196
1197                         pred = skip_Proj(get_Load_mem(pred));
1198                 } else {
1199                         /* follow only Load chains */
1200                         break;
1201                 }
1202
1203                 /* check for cycles */
1204                 if (NODE_VISITED(pred_info))
1205                         break;
1206                 MARK_NODE(pred_info);
1207         }
1208
1209         if (is_Sync(pred)) {
1210                 int i;
1211
1212                 /* handle all Sync predecessors */
1213                 for (i = get_Sync_n_preds(pred) - 1; i >= 0; --i) {
1214                         res |= follow_Mem_chain_for_Store(store, skip_Proj(get_Sync_pred(pred, i)));
1215                         if (res)
1216                                 break;
1217                 }
1218         }
1219         return res;
1220 }  /* follow_Mem_chain_for_Store */
1221
1222 /** find entity used as base for an address calculation */
1223 static ir_entity *find_entity(ir_node *ptr)
1224 {
1225         switch (get_irn_opcode(ptr)) {
1226         case iro_SymConst:
1227                 return get_SymConst_entity(ptr);
1228         case iro_Sel: {
1229                 ir_node *pred = get_Sel_ptr(ptr);
1230                 if (get_irg_frame(get_irn_irg(ptr)) == pred)
1231                         return get_Sel_entity(ptr);
1232
1233                 return find_entity(pred);
1234         }
1235         case iro_Sub:
1236         case iro_Add: {
1237                 ir_node *left = get_binop_left(ptr);
1238                 ir_node *right;
1239                 if (mode_is_reference(get_irn_mode(left)))
1240                         return find_entity(left);
1241                 right = get_binop_right(ptr);
1242                 if (mode_is_reference(get_irn_mode(right)))
1243                         return find_entity(right);
1244                 return NULL;
1245         }
1246         default:
1247                 return NULL;
1248         }
1249 }
1250
1251 /**
1252  * optimize a Store
1253  *
1254  * @param store  the Store node
1255  */
1256 static unsigned optimize_store(ir_node *store)
1257 {
1258         ir_node   *ptr;
1259         ir_node   *mem;
1260         ir_entity *entity;
1261
1262         if (get_Store_volatility(store) == volatility_is_volatile)
1263                 return 0;
1264
1265         ptr    = get_Store_ptr(store);
1266         entity = find_entity(ptr);
1267
1268         /* a store to an entity which is never read is unnecessary */
1269         if (entity != NULL && !(get_entity_usage(entity) & ir_usage_read)) {
1270                 ldst_info_t *info = (ldst_info_t*)get_irn_link(store);
1271                 if (info->projs[pn_Store_X_except] == NULL) {
1272                         DB((dbg, LEVEL_1, "  Killing useless %+F to never read entity %+F\n", store, entity));
1273                         exchange(info->projs[pn_Store_M], get_Store_mem(store));
1274                         kill_node(store);
1275                         reduce_adr_usage(ptr);
1276                         return DF_CHANGED;
1277                 }
1278         }
1279
1280         /* Check, if the address of this Store is used more than once.
1281          * If not, this Store cannot be removed in any case. */
1282         if (get_irn_n_uses(ptr) <= 1)
1283                 return 0;
1284
1285         mem = get_Store_mem(store);
1286
1287         /* follow the memory chain as long as there are only Loads */
1288         INC_MASTER();
1289
1290         return follow_Mem_chain_for_Store(store, skip_Proj(mem));
1291 }  /* optimize_store */
1292
1293 /**
1294  * walker, optimizes Phi after Stores to identical places:
1295  * Does the following optimization:
1296  * @verbatim
1297  *
1298  *   val1   val2   val3          val1  val2  val3
1299  *    |      |      |               \    |    /
1300  *  Store  Store  Store              \   |   /
1301  *      \    |    /                   PhiData
1302  *       \   |   /                       |
1303  *        \  |  /                      Store
1304  *          PhiM
1305  *
1306  * @endverbatim
1307  * This reduces the number of stores and allows for predicated execution.
1308  * Moves Stores back to the end of a function which may be bad.
1309  *
1310  * This is only possible if the predecessor blocks have only one successor.
1311  */
1312 static unsigned optimize_phi(ir_node *phi, walk_env_t *wenv)
1313 {
1314         int i, n;
1315         ir_node *store, *ptr, *block, *phi_block, *phiM, *phiD, *exc, *projM;
1316 #ifdef DO_CACHEOPT
1317         ir_node *old_store;
1318 #endif
1319         ir_mode *mode;
1320         ir_node **inM, **inD, **projMs;
1321         int *idx;
1322         dbg_info *db = NULL;
1323         ldst_info_t *info;
1324         block_info_t *bl_info;
1325         unsigned res = 0;
1326
1327         /* Must be a memory Phi */
1328         if (get_irn_mode(phi) != mode_M)
1329                 return 0;
1330
1331         n = get_Phi_n_preds(phi);
1332         if (n <= 0)
1333                 return 0;
1334
1335         /* must be only one user */
1336         projM = get_Phi_pred(phi, 0);
1337         if (get_irn_n_edges(projM) != 1)
1338                 return 0;
1339
1340         store = skip_Proj(projM);
1341 #ifdef DO_CACHEOPT
1342         old_store = store;
1343 #endif
1344         if (!is_Store(store))
1345                 return 0;
1346
1347         block = get_nodes_block(store);
1348
1349         /* check if the block is post dominated by Phi-block
1350            and has no exception exit */
1351         bl_info = (block_info_t*)get_irn_link(block);
1352         if (bl_info->flags & BLOCK_HAS_EXC)
1353                 return 0;
1354
1355         phi_block = get_nodes_block(phi);
1356         if (! block_strictly_postdominates(phi_block, block))
1357                 return 0;
1358
1359         /* this is the address of the store */
1360         ptr  = get_Store_ptr(store);
1361         mode = get_irn_mode(get_Store_value(store));
1362         info = (ldst_info_t*)get_irn_link(store);
1363         exc  = info->exc_block;
1364
1365         for (i = 1; i < n; ++i) {
1366                 ir_node *pred = get_Phi_pred(phi, i);
1367
1368                 if (get_irn_n_edges(pred) != 1)
1369                         return 0;
1370
1371                 pred = skip_Proj(pred);
1372                 if (!is_Store(pred))
1373                         return 0;
1374
1375                 if (ptr != get_Store_ptr(pred) || mode != get_irn_mode(get_Store_value(pred)))
1376                         return 0;
1377
1378                 info = (ldst_info_t*)get_irn_link(pred);
1379
1380                 /* check, if all stores have the same exception flow */
1381                 if (exc != info->exc_block)
1382                         return 0;
1383
1384                 block = get_nodes_block(pred);
1385
1386                 /* check if the block is post dominated by Phi-block
1387                    and has no exception exit. Note that block must be different from
1388                    Phi-block, else we would move a Store from end End of a block to its
1389                    Start... */
1390                 bl_info = (block_info_t*)get_irn_link(block);
1391                 if (bl_info->flags & BLOCK_HAS_EXC)
1392                         return 0;
1393                 if (block == phi_block || ! block_postdominates(phi_block, block))
1394                         return 0;
1395         }
1396
1397         /*
1398          * ok, when we are here, we found all predecessors of a Phi that
1399          * are Stores to the same address and size. That means whatever
1400          * we do before we enter the block of the Phi, we do a Store.
1401          * So, we can move the Store to the current block:
1402          *
1403          *   val1    val2    val3          val1  val2  val3
1404          *    |       |       |               \    |    /
1405          * | Str | | Str | | Str |             \   |   /
1406          *      \     |     /                   PhiData
1407          *       \    |    /                       |
1408          *        \   |   /                       Str
1409          *           PhiM
1410          *
1411          * Is only allowed if the predecessor blocks have only one successor.
1412          */
1413
1414         NEW_ARR_A(ir_node *, projMs, n);
1415         NEW_ARR_A(ir_node *, inM, n);
1416         NEW_ARR_A(ir_node *, inD, n);
1417         NEW_ARR_A(int, idx, n);
1418
1419         /* Prepare: Collect all Store nodes.  We must do this
1420            first because we otherwise may loose a store when exchanging its
1421            memory Proj.
1422          */
1423         for (i = n - 1; i >= 0; --i) {
1424                 ir_node *store;
1425
1426                 projMs[i] = get_Phi_pred(phi, i);
1427                 assert(is_Proj(projMs[i]));
1428
1429                 store = get_Proj_pred(projMs[i]);
1430                 info  = (ldst_info_t*)get_irn_link(store);
1431
1432                 inM[i] = get_Store_mem(store);
1433                 inD[i] = get_Store_value(store);
1434                 idx[i] = info->exc_idx;
1435         }
1436         block = get_nodes_block(phi);
1437
1438         /* second step: create a new memory Phi */
1439         phiM = new_rd_Phi(get_irn_dbg_info(phi), block, n, inM, mode_M);
1440
1441         /* third step: create a new data Phi */
1442         phiD = new_rd_Phi(get_irn_dbg_info(phi), block, n, inD, mode);
1443
1444         /* rewire memory and kill the node */
1445         for (i = n - 1; i >= 0; --i) {
1446                 ir_node *proj  = projMs[i];
1447
1448                 if (is_Proj(proj)) {
1449                         ir_node *store = get_Proj_pred(proj);
1450                         exchange(proj, inM[i]);
1451                         kill_node(store);
1452                 }
1453         }
1454
1455         /* fourth step: create the Store */
1456         store = new_rd_Store(db, block, phiM, ptr, phiD, cons_none);
1457 #ifdef DO_CACHEOPT
1458         co_set_irn_name(store, co_get_irn_ident(old_store));
1459 #endif
1460
1461         projM = new_rd_Proj(NULL, store, mode_M, pn_Store_M);
1462
1463         info = get_ldst_info(store, &wenv->obst);
1464         info->projs[pn_Store_M] = projM;
1465
1466         /* fifths step: repair exception flow */
1467         if (exc) {
1468                 ir_node *projX = new_rd_Proj(NULL, store, mode_X, pn_Store_X_except);
1469
1470                 info->projs[pn_Store_X_except] = projX;
1471                 info->exc_block                = exc;
1472                 info->exc_idx                  = idx[0];
1473
1474                 for (i = 0; i < n; ++i) {
1475                         set_Block_cfgpred(exc, idx[i], projX);
1476                 }
1477
1478                 if (n > 1) {
1479                         /* the exception block should be optimized as some inputs are identical now */
1480                 }
1481
1482                 res |= CF_CHANGED;
1483         }
1484
1485         /* sixth step: replace old Phi */
1486         exchange(phi, projM);
1487
1488         return res | DF_CHANGED;
1489 }  /* optimize_phi */
1490
1491 /**
1492  * walker, do the optimizations
1493  */
1494 static void do_load_store_optimize(ir_node *n, void *env)
1495 {
1496         walk_env_t *wenv = (walk_env_t*)env;
1497
1498         switch (get_irn_opcode(n)) {
1499
1500         case iro_Load:
1501                 wenv->changes |= optimize_load(n);
1502                 break;
1503
1504         case iro_Store:
1505                 wenv->changes |= optimize_store(n);
1506                 break;
1507
1508         case iro_Phi:
1509                 wenv->changes |= optimize_phi(n, wenv);
1510                 break;
1511
1512         default:
1513                 break;
1514         }
1515 }  /* do_load_store_optimize */
1516
1517 /** A scc. */
1518 typedef struct scc {
1519         ir_node *head;      /**< the head of the list */
1520 } scc;
1521
1522 /** A node entry. */
1523 typedef struct node_entry {
1524         unsigned DFSnum;    /**< the DFS number of this node */
1525         unsigned low;       /**< the low number of this node */
1526         int      in_stack;  /**< flag, set if the node is on the stack */
1527         ir_node  *next;     /**< link to the next node the the same scc */
1528         scc      *pscc;     /**< the scc of this node */
1529         unsigned POnum;     /**< the post order number for blocks */
1530 } node_entry;
1531
1532 /** A loop entry. */
1533 typedef struct loop_env {
1534         ir_nodehashmap_t map;
1535         struct obstack   obst;
1536         ir_node          **stack;      /**< the node stack */
1537         size_t           tos;          /**< tos index */
1538         unsigned         nextDFSnum;   /**< the current DFS number */
1539         unsigned         POnum;        /**< current post order number */
1540
1541         unsigned         changes;      /**< a bitmask of graph changes */
1542 } loop_env;
1543
1544 /**
1545 * Gets the node_entry of a node
1546 */
1547 static node_entry *get_irn_ne(ir_node *irn, loop_env *env)
1548 {
1549         node_entry *e = (node_entry*)ir_nodehashmap_get(&env->map, irn);
1550
1551         if (e == NULL) {
1552                 e = OALLOC(&env->obst, node_entry);
1553                 memset(e, 0, sizeof(*e));
1554                 ir_nodehashmap_insert(&env->map, irn, e);
1555         }
1556         return e;
1557 }  /* get_irn_ne */
1558
1559 /**
1560  * Push a node onto the stack.
1561  *
1562  * @param env   the loop environment
1563  * @param n     the node to push
1564  */
1565 static void push(loop_env *env, ir_node *n)
1566 {
1567         node_entry *e;
1568
1569         if (env->tos == ARR_LEN(env->stack)) {
1570                 size_t nlen = ARR_LEN(env->stack) * 2;
1571                 ARR_RESIZE(ir_node *, env->stack, nlen);
1572         }
1573         env->stack[env->tos++] = n;
1574         e = get_irn_ne(n, env);
1575         e->in_stack = 1;
1576 }  /* push */
1577
1578 /**
1579  * pop a node from the stack
1580  *
1581  * @param env   the loop environment
1582  *
1583  * @return  The topmost node
1584  */
1585 static ir_node *pop(loop_env *env)
1586 {
1587         ir_node *n = env->stack[--env->tos];
1588         node_entry *e = get_irn_ne(n, env);
1589
1590         e->in_stack = 0;
1591         return n;
1592 }  /* pop */
1593
1594 /**
1595  * Check if irn is a region constant.
1596  * The block or irn must strictly dominate the header block.
1597  *
1598  * @param irn           the node to check
1599  * @param header_block  the header block of the induction variable
1600  */
1601 static int is_rc(ir_node *irn, ir_node *header_block)
1602 {
1603         ir_node *block = get_nodes_block(irn);
1604
1605         return (block != header_block) && block_dominates(block, header_block);
1606 }  /* is_rc */
1607
1608 typedef struct phi_entry phi_entry;
1609 struct phi_entry {
1610         ir_node   *phi;    /**< A phi with a region const memory. */
1611         int       pos;     /**< The position of the region const memory */
1612         ir_node   *load;   /**< the newly created load for this phi */
1613         phi_entry *next;
1614 };
1615
1616 /**
1617  * An entry in the avail set.
1618  */
1619 typedef struct avail_entry_t {
1620         ir_node *ptr;   /**< the address pointer */
1621         ir_mode *mode;  /**< the load mode */
1622         ir_node *load;  /**< the associated Load */
1623 } avail_entry_t;
1624
1625 /**
1626  * Compare two avail entries.
1627  */
1628 static int cmp_avail_entry(const void *elt, const void *key, size_t size)
1629 {
1630         const avail_entry_t *a = (const avail_entry_t*)elt;
1631         const avail_entry_t *b = (const avail_entry_t*)key;
1632         (void) size;
1633
1634         return a->ptr != b->ptr || a->mode != b->mode;
1635 }  /* cmp_avail_entry */
1636
1637 /**
1638  * Calculate the hash value of an avail entry.
1639  */
1640 static unsigned hash_cache_entry(const avail_entry_t *entry)
1641 {
1642         return get_irn_idx(entry->ptr) * 9 + hash_ptr(entry->mode);
1643 }  /* hash_cache_entry */
1644
1645 /**
1646  * Move loops out of loops if possible.
1647  *
1648  * @param pscc   the loop described by an SCC
1649  * @param env    the loop environment
1650  */
1651 static void move_loads_out_of_loops(scc *pscc, loop_env *env)
1652 {
1653         ir_node   *phi, *load, *next, *other, *next_other;
1654         int       j;
1655         phi_entry *phi_list = NULL;
1656         set       *avail;
1657
1658         /* collect all outer memories */
1659         for (phi = pscc->head; phi != NULL; phi = next) {
1660                 node_entry *ne = get_irn_ne(phi, env);
1661                 next = ne->next;
1662
1663                 /* check all memory Phi's */
1664                 if (! is_Phi(phi))
1665                         continue;
1666
1667                 assert(get_irn_mode(phi) == mode_M && "DFS return non-memory Phi");
1668
1669                 for (j = get_irn_arity(phi) - 1; j >= 0; --j) {
1670                         ir_node    *pred = get_irn_n(phi, j);
1671                         node_entry *pe   = get_irn_ne(pred, env);
1672
1673                         if (pe->pscc != ne->pscc) {
1674                                 /* not in the same SCC, is region const */
1675                                 phi_entry *pe = OALLOC(&env->obst, phi_entry);
1676
1677                                 pe->phi  = phi;
1678                                 pe->pos  = j;
1679                                 pe->next = phi_list;
1680                                 phi_list = pe;
1681                         }
1682                 }
1683         }
1684         /* no Phis no fun */
1685         assert(phi_list != NULL && "DFS found a loop without Phi");
1686
1687         /* for now, we cannot handle more than one input (only reducible cf) */
1688         if (phi_list->next != NULL)
1689                 return;
1690
1691         avail = new_set(cmp_avail_entry, 8);
1692
1693         for (load = pscc->head; load; load = next) {
1694                 ir_mode *load_mode;
1695                 node_entry *ne = get_irn_ne(load, env);
1696                 next = ne->next;
1697
1698                 if (is_Load(load)) {
1699                         ldst_info_t *info = (ldst_info_t*)get_irn_link(load);
1700                         ir_node     *ptr = get_Load_ptr(load);
1701
1702                         /* for now, we cannot handle Loads with exceptions */
1703                         if (info->projs[pn_Load_res] == NULL || info->projs[pn_Load_X_regular] != NULL || info->projs[pn_Load_X_except] != NULL)
1704                                 continue;
1705
1706                         /* for now, we can only move Load(Global) */
1707                         if (! is_SymConst_addr_ent(ptr))
1708                                 continue;
1709                         load_mode = get_Load_mode(load);
1710                         for (other = pscc->head; other != NULL; other = next_other) {
1711                                 node_entry *ne = get_irn_ne(other, env);
1712                                 next_other = ne->next;
1713
1714                                 if (is_Store(other)) {
1715                                         ir_alias_relation rel = get_alias_relation(
1716                                                 get_Store_ptr(other),
1717                                                 get_irn_mode(get_Store_value(other)),
1718                                                 ptr, load_mode);
1719                                         /* if the might be an alias, we cannot pass this Store */
1720                                         if (rel != ir_no_alias)
1721                                                 break;
1722                                 }
1723                                 /* only Phis and pure Calls are allowed here, so ignore them */
1724                         }
1725                         if (other == NULL) {
1726                                 ldst_info_t *ninfo = NULL;
1727                                 phi_entry   *pe;
1728                                 dbg_info    *db;
1729
1730                                 /* yep, no aliasing Store found, Load can be moved */
1731                                 DB((dbg, LEVEL_1, "  Found a Load that could be moved: %+F\n", load));
1732
1733                                 db   = get_irn_dbg_info(load);
1734                                 for (pe = phi_list; pe != NULL; pe = pe->next) {
1735                                         int     pos   = pe->pos;
1736                                         ir_node *phi  = pe->phi;
1737                                         ir_node *blk  = get_nodes_block(phi);
1738                                         ir_node *pred = get_Block_cfgpred_block(blk, pos);
1739                                         ir_node *irn, *mem;
1740                                         avail_entry_t entry, *res;
1741
1742                                         entry.ptr  = ptr;
1743                                         entry.mode = load_mode;
1744                                         res = (avail_entry_t*)set_find(avail, &entry, sizeof(entry), hash_cache_entry(&entry));
1745                                         if (res != NULL) {
1746                                                 irn = res->load;
1747                                         } else {
1748                                                 irn = new_rd_Load(db, pred, get_Phi_pred(phi, pos), ptr, load_mode, cons_none);
1749                                                 entry.load = irn;
1750                                                 set_insert(avail, &entry, sizeof(entry), hash_cache_entry(&entry));
1751                                                 DB((dbg, LEVEL_1, "  Created %+F in %+F\n", irn, pred));
1752                                         }
1753                                         pe->load = irn;
1754                                         ninfo = get_ldst_info(irn, &env->obst);
1755
1756                                         ninfo->projs[pn_Load_M] = mem = new_r_Proj(irn, mode_M, pn_Load_M);
1757                                         if (res == NULL) {
1758                                                 /* irn is from cache, so do not set phi pred again.
1759                                                  * There might be other Loads between phi and irn already.
1760                                                  */
1761                                                 set_Phi_pred(phi, pos, mem);
1762                                         }
1763
1764                                         ninfo->projs[pn_Load_res] = new_r_Proj(irn, load_mode, pn_Load_res);
1765                                 }
1766
1767                                 /* now kill the old Load */
1768                                 exchange(info->projs[pn_Load_M], get_Load_mem(load));
1769                                 exchange(info->projs[pn_Load_res], ninfo->projs[pn_Load_res]);
1770
1771                                 env->changes |= DF_CHANGED;
1772                         }
1773                 }
1774         }
1775         del_set(avail);
1776 }  /* move_loads_out_of_loops */
1777
1778 /**
1779  * Process a loop SCC.
1780  *
1781  * @param pscc  the SCC
1782  * @param env   the loop environment
1783  */
1784 static void process_loop(scc *pscc, loop_env *env)
1785 {
1786         ir_node *irn, *next, *header = NULL;
1787         node_entry *b, *h = NULL;
1788         int j, only_phi, num_outside, process = 0;
1789         ir_node *out_rc;
1790
1791         /* find the header block for this scc */
1792         for (irn = pscc->head; irn; irn = next) {
1793                 node_entry *e = get_irn_ne(irn, env);
1794                 ir_node *block = get_nodes_block(irn);
1795
1796                 next = e->next;
1797                 b = get_irn_ne(block, env);
1798
1799                 if (header != NULL) {
1800                         if (h->POnum < b->POnum) {
1801                                 header = block;
1802                                 h      = b;
1803                         }
1804                 } else {
1805                         header = block;
1806                         h      = b;
1807                 }
1808         }
1809
1810         /* check if this scc contains only Phi, Loads or Stores nodes */
1811         only_phi    = 1;
1812         num_outside = 0;
1813         out_rc      = NULL;
1814         for (irn = pscc->head; irn; irn = next) {
1815                 node_entry *e = get_irn_ne(irn, env);
1816
1817                 next = e->next;
1818                 switch (get_irn_opcode(irn)) {
1819                 case iro_Call:
1820                         if (is_Call_pure(irn)) {
1821                                 /* pure calls can be treated like loads */
1822                                 only_phi = 0;
1823                                 break;
1824                         }
1825                         /* non-pure calls must be handle like may-alias Stores */
1826                         goto fail;
1827                 case iro_CopyB:
1828                         /* cannot handle CopyB yet */
1829                         goto fail;
1830                 case iro_Load:
1831                         process = 1;
1832                         if (get_Load_volatility(irn) == volatility_is_volatile) {
1833                                 /* cannot handle loops with volatile Loads */
1834                                 goto fail;
1835                         }
1836                         only_phi = 0;
1837                         break;
1838                 case iro_Store:
1839                         if (get_Store_volatility(irn) == volatility_is_volatile) {
1840                                 /* cannot handle loops with volatile Stores */
1841                                 goto fail;
1842                         }
1843                         only_phi = 0;
1844                         break;
1845                 default:
1846                         only_phi = 0;
1847                         break;
1848                 case iro_Phi:
1849                         for (j = get_irn_arity(irn) - 1; j >= 0; --j) {
1850                                 ir_node *pred  = get_irn_n(irn, j);
1851                                 node_entry *pe = get_irn_ne(pred, env);
1852
1853                                 if (pe->pscc != e->pscc) {
1854                                         /* not in the same SCC, must be a region const */
1855                                         if (! is_rc(pred, header)) {
1856                                                 /* not a memory loop */
1857                                                 goto fail;
1858                                         }
1859                                         if (out_rc == NULL) {
1860                                                 /* first region constant */
1861                                                 out_rc = pred;
1862                                                 ++num_outside;
1863                                         } else if (out_rc != pred) {
1864                                                 /* another region constant */
1865                                                 ++num_outside;
1866                                         }
1867                                 }
1868                         }
1869                         break;
1870                 }
1871         }
1872         if (! process)
1873                 goto fail;
1874
1875         /* found a memory loop */
1876         DB((dbg, LEVEL_2, "  Found a memory loop:\n  "));
1877         if (only_phi && num_outside == 1) {
1878                 /* a phi cycle with only one real predecessor can be collapsed */
1879                 DB((dbg, LEVEL_2, "  Found an USELESS Phi cycle:\n  "));
1880
1881                 for (irn = pscc->head; irn; irn = next) {
1882                         node_entry *e = get_irn_ne(irn, env);
1883                         next = e->next;
1884                         exchange(irn, out_rc);
1885                 }
1886                 env->changes |= DF_CHANGED;
1887                 return;
1888         }
1889
1890 #ifdef DEBUG_libfirm
1891         for (irn = pscc->head; irn; irn = next) {
1892                 node_entry *e = get_irn_ne(irn, env);
1893                 next = e->next;
1894                 DB((dbg, LEVEL_2, " %+F,", irn));
1895         }
1896         DB((dbg, LEVEL_2, "\n"));
1897 #endif
1898         move_loads_out_of_loops(pscc, env);
1899
1900 fail:
1901         ;
1902 }  /* process_loop */
1903
1904 /**
1905  * Process a SCC.
1906  *
1907  * @param pscc  the SCC
1908  * @param env   the loop environment
1909  */
1910 static void process_scc(scc *pscc, loop_env *env)
1911 {
1912         ir_node *head = pscc->head;
1913         node_entry *e = get_irn_ne(head, env);
1914
1915 #ifdef DEBUG_libfirm
1916         {
1917                 ir_node *irn, *next;
1918
1919                 DB((dbg, LEVEL_4, " SCC at %p:\n ", pscc));
1920                 for (irn = pscc->head; irn; irn = next) {
1921                         node_entry *e = get_irn_ne(irn, env);
1922
1923                         next = e->next;
1924
1925                         DB((dbg, LEVEL_4, " %+F,", irn));
1926                 }
1927                 DB((dbg, LEVEL_4, "\n"));
1928         }
1929 #endif
1930
1931         if (e->next != NULL) {
1932                 /* this SCC has more than one member */
1933                 process_loop(pscc, env);
1934         }
1935 }  /* process_scc */
1936
1937 /**
1938  * Do Tarjan's SCC algorithm and drive load/store optimization.
1939  *
1940  * @param irn  start at this node
1941  * @param env  the loop environment
1942  */
1943 static void dfs(ir_node *irn, loop_env *env)
1944 {
1945         int i, n;
1946         node_entry *node = get_irn_ne(irn, env);
1947
1948         mark_irn_visited(irn);
1949
1950         node->DFSnum = env->nextDFSnum++;
1951         node->low    = node->DFSnum;
1952         push(env, irn);
1953
1954         /* handle preds */
1955         if (is_Phi(irn) || is_Sync(irn)) {
1956                 n = get_irn_arity(irn);
1957                 for (i = 0; i < n; ++i) {
1958                         ir_node *pred = get_irn_n(irn, i);
1959                         node_entry *o = get_irn_ne(pred, env);
1960
1961                         if (!irn_visited(pred)) {
1962                                 dfs(pred, env);
1963                                 node->low = MIN(node->low, o->low);
1964                         }
1965                         if (o->DFSnum < node->DFSnum && o->in_stack)
1966                                 node->low = MIN(o->DFSnum, node->low);
1967                 }
1968         } else if (is_fragile_op(irn)) {
1969                 ir_node *pred = get_memop_mem(irn);
1970                 node_entry *o = get_irn_ne(pred, env);
1971
1972                 if (!irn_visited(pred)) {
1973                         dfs(pred, env);
1974                         node->low = MIN(node->low, o->low);
1975                 }
1976                 if (o->DFSnum < node->DFSnum && o->in_stack)
1977                         node->low = MIN(o->DFSnum, node->low);
1978         } else if (is_Proj(irn)) {
1979                 ir_node *pred = get_Proj_pred(irn);
1980                 node_entry *o = get_irn_ne(pred, env);
1981
1982                 if (!irn_visited(pred)) {
1983                         dfs(pred, env);
1984                         node->low = MIN(node->low, o->low);
1985                 }
1986                 if (o->DFSnum < node->DFSnum && o->in_stack)
1987                         node->low = MIN(o->DFSnum, node->low);
1988         }
1989         else {
1990                  /* IGNORE predecessors */
1991         }
1992
1993         if (node->low == node->DFSnum) {
1994                 scc *pscc = OALLOC(&env->obst, scc);
1995                 ir_node *x;
1996
1997                 pscc->head = NULL;
1998                 do {
1999                         node_entry *e;
2000
2001                         x = pop(env);
2002                         e = get_irn_ne(x, env);
2003                         e->pscc    = pscc;
2004                         e->next    = pscc->head;
2005                         pscc->head = x;
2006                 } while (x != irn);
2007
2008                 process_scc(pscc, env);
2009         }
2010 }  /* dfs */
2011
2012 /**
2013  * Do the DFS on the memory edges a graph.
2014  *
2015  * @param irg  the graph to process
2016  * @param env  the loop environment
2017  */
2018 static void do_dfs(ir_graph *irg, loop_env *env)
2019 {
2020         ir_node  *endblk, *end;
2021         int      i;
2022
2023         inc_irg_visited(irg);
2024
2025         /* visit all memory nodes */
2026         endblk = get_irg_end_block(irg);
2027         for (i = get_Block_n_cfgpreds(endblk) - 1; i >= 0; --i) {
2028                 ir_node *pred = get_Block_cfgpred(endblk, i);
2029
2030                 pred = skip_Proj(pred);
2031                 if (is_Return(pred)) {
2032                         dfs(get_Return_mem(pred), env);
2033                 } else if (is_Raise(pred)) {
2034                         dfs(get_Raise_mem(pred), env);
2035                 } else if (is_fragile_op(pred)) {
2036                         dfs(get_memop_mem(pred), env);
2037                 } else if (is_Bad(pred)) {
2038                         /* ignore non-optimized block predecessor */
2039                 } else {
2040                         assert(0 && "Unknown EndBlock predecessor");
2041                 }
2042         }
2043
2044         /* visit the keep-alives */
2045         end = get_irg_end(irg);
2046         for (i = get_End_n_keepalives(end) - 1; i >= 0; --i) {
2047                 ir_node *ka = get_End_keepalive(end, i);
2048
2049                 if (is_Phi(ka) && !irn_visited(ka))
2050                         dfs(ka, env);
2051         }
2052 }  /* do_dfs */
2053
2054 /**
2055  * Optimize Loads/Stores in loops.
2056  *
2057  * @param irg  the graph
2058  */
2059 static int optimize_loops(ir_graph *irg)
2060 {
2061         loop_env env;
2062
2063         env.stack         = NEW_ARR_F(ir_node *, 128);
2064         env.tos           = 0;
2065         env.nextDFSnum    = 0;
2066         env.POnum         = 0;
2067         env.changes       = 0;
2068         ir_nodehashmap_init(&env.map);
2069         obstack_init(&env.obst);
2070
2071         /* calculate the SCC's and drive loop optimization. */
2072         do_dfs(irg, &env);
2073
2074         DEL_ARR_F(env.stack);
2075         obstack_free(&env.obst, NULL);
2076         ir_nodehashmap_destroy(&env.map);
2077
2078         return env.changes;
2079 }  /* optimize_loops */
2080
2081 /*
2082  * do the load store optimization
2083  */
2084 static ir_graph_properties_t do_loadstore_opt(ir_graph *irg)
2085 {
2086         walk_env_t env;
2087         ir_graph_properties_t res = 0;
2088
2089         FIRM_DBG_REGISTER(dbg, "firm.opt.ldstopt");
2090
2091         assert(get_irg_phase_state(irg) != phase_building);
2092         assert(get_irg_pinned(irg) != op_pin_state_floats &&
2093                 "LoadStore optimization needs pinned graph");
2094
2095         if (get_opt_alias_analysis()) {
2096                 assure_irp_globals_entity_usage_computed();
2097         }
2098
2099         obstack_init(&env.obst);
2100         env.changes = 0;
2101
2102         /* init the links, then collect Loads/Stores/Proj's in lists */
2103         master_visited = 0;
2104         irg_walk_graph(irg, firm_clear_link, collect_nodes, &env);
2105
2106         /* now we have collected enough information, optimize */
2107         irg_walk_graph(irg, NULL, do_load_store_optimize, &env);
2108
2109         env.changes |= optimize_loops(irg);
2110
2111         obstack_free(&env.obst, NULL);
2112
2113         /* Handle graph state */
2114         if (env.changes) {
2115                 edges_deactivate(irg);
2116         }
2117
2118         if (!(env.changes & CF_CHANGED)) {
2119                 res |= IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE | IR_GRAPH_PROPERTY_NO_BADS;
2120         }
2121
2122         return res;
2123 }
2124
2125 static optdesc_t opt_loadstore = {
2126         "load-store",
2127         IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE | IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE,
2128         do_loadstore_opt,
2129 };
2130
2131 int optimize_load_store(ir_graph *irg)
2132 {
2133         perform_irg_optimization(irg, &opt_loadstore);
2134         return 1;
2135 }
2136
2137 ir_graph_pass_t *optimize_load_store_pass(const char *name)
2138 {
2139         return def_graph_pass_ret(name ? name : "ldst", optimize_load_store);
2140 }  /* optimize_load_store_pass */