added a callback function to check whether a given entity is a allocation call
[libfirm] / ir / opt / ldstopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Load/Store optimizations.
23  * @author  Michael Beck
24  * @version $Id$
25  */
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #endif
29
30 #ifdef HAVE_STRING_H
31 # include <string.h>
32 #endif
33
34 #include "irnode_t.h"
35 #include "irgraph_t.h"
36 #include "irmode_t.h"
37 #include "iropt_t.h"
38 #include "ircons_t.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41 #include "irvrfy.h"
42 #include "tv_t.h"
43 #include "dbginfo_t.h"
44 #include "iropt_dbg.h"
45 #include "irflag_t.h"
46 #include "array.h"
47 #include "irhooks.h"
48 #include "iredges.h"
49 #include "irtools.h"
50 #include "opt_polymorphy.h"
51 #include "irmemory.h"
52 #include "xmalloc.h"
53
54 #ifdef DO_CACHEOPT
55 #include "cacheopt/cachesim.h"
56 #endif
57
58 #undef IMAX
59 #define IMAX(a,b)       ((a) > (b) ? (a) : (b))
60
61 #define MAX_PROJ        IMAX(pn_Load_max, pn_Store_max)
62
63 enum changes_t {
64         DF_CHANGED = 1,       /**< data flow changed */
65         CF_CHANGED = 2,       /**< control flow changed */
66 };
67
68 /**
69  * walker environment
70  */
71 typedef struct _walk_env_t {
72         struct obstack obst;          /**< list of all stores */
73         unsigned changes;             /**< a bitmask of graph changes */
74 } walk_env_t;
75
76 /**
77  * flags for Load/Store
78  */
79 enum ldst_flags_t {
80         LDST_VISITED = 1              /**< if set, this Load/Store is already visited */
81 };
82
83 /** A Load/Store info. */
84 typedef struct _ldst_info_t {
85         ir_node  *projs[MAX_PROJ];    /**< list of Proj's of this node */
86         ir_node  *exc_block;          /**< the exception block if available */
87         int      exc_idx;             /**< predecessor index in the exception block */
88         unsigned flags;               /**< flags */
89         unsigned visited;             /**< visited counter for breaking loops */
90 } ldst_info_t;
91
92 /**
93  * flags for control flow.
94  */
95 enum block_flags_t {
96         BLOCK_HAS_COND = 1,      /**< Block has conditional control flow */
97         BLOCK_HAS_EXC  = 2       /**< Block has exceptional control flow */
98 };
99
100 /**
101  * a Block info.
102  */
103 typedef struct _block_info_t {
104         unsigned flags;               /**< flags for the block */
105 } block_info_t;
106
107 /** the master visited flag for loop detection. */
108 static unsigned master_visited = 0;
109
110 #define INC_MASTER()       ++master_visited
111 #define MARK_NODE(info)    (info)->visited = master_visited
112 #define NODE_VISITED(info) (info)->visited >= master_visited
113
114 /**
115  * get the Load/Store info of a node
116  */
117 static ldst_info_t *get_ldst_info(ir_node *node, walk_env_t *env) {
118         ldst_info_t *info = get_irn_link(node);
119
120         if (! info) {
121                 info = obstack_alloc(&env->obst, sizeof(*info));
122                 memset(info, 0, sizeof(*info));
123                 set_irn_link(node, info);
124         }
125         return info;
126 }  /* get_ldst_info */
127
128 /**
129  * get the Block info of a node
130  */
131 static block_info_t *get_block_info(ir_node *node, walk_env_t *env) {
132         block_info_t *info = get_irn_link(node);
133
134         if (! info) {
135                 info = obstack_alloc(&env->obst, sizeof(*info));
136                 memset(info, 0, sizeof(*info));
137                 set_irn_link(node, info);
138         }
139         return info;
140 }  /* get_block_info */
141
142 /**
143  * update the projection info for a Load/Store
144  */
145 static unsigned update_projs(ldst_info_t *info, ir_node *proj)
146 {
147         long nr = get_Proj_proj(proj);
148
149         assert(0 <= nr && nr <= MAX_PROJ && "Wrong proj from LoadStore");
150
151         if (info->projs[nr]) {
152                 /* there is already one, do CSE */
153                 exchange(proj, info->projs[nr]);
154                 return DF_CHANGED;
155         }
156         else {
157                 info->projs[nr] = proj;
158                 return 0;
159         }
160 }  /* update_projs */
161
162 /**
163  * update the exception block info for a Load/Store node.
164  *
165  * @param info   the load/store info struct
166  * @param block  the exception handler block for this load/store
167  * @param pos    the control flow input of the block
168  */
169 static unsigned update_exc(ldst_info_t *info, ir_node *block, int pos)
170 {
171         assert(info->exc_block == NULL && "more than one exception block found");
172
173         info->exc_block = block;
174         info->exc_idx   = pos;
175         return 0;
176 }  /* update_exc */
177
178 /** Return the number of uses of an address node */
179 #define get_irn_n_uses(adr)     get_irn_n_edges(adr)
180
181 /**
182  * walker, collects all Load/Store/Proj nodes
183  *
184  * walks from Start -> End
185  */
186 static void collect_nodes(ir_node *node, void *env)
187 {
188         ir_op       *op = get_irn_op(node);
189         ir_node     *pred, *blk, *pred_blk;
190         ldst_info_t *ldst_info;
191         walk_env_t  *wenv = env;
192
193         if (op == op_Proj) {
194                 ir_node *adr;
195                 ir_op *op;
196
197                 pred = get_Proj_pred(node);
198                 op   = get_irn_op(pred);
199
200                 if (op == op_Load) {
201                         ldst_info = get_ldst_info(pred, wenv);
202
203                         wenv->changes |= update_projs(ldst_info, node);
204
205                         if ((ldst_info->flags & LDST_VISITED) == 0) {
206                                 adr = get_Load_ptr(pred);
207                                 ldst_info->flags |= LDST_VISITED;
208                         }
209
210                         /*
211                         * Place the Proj's to the same block as the
212                         * predecessor Load. This is always ok and prevents
213                         * "non-SSA" form after optimizations if the Proj
214                         * is in a wrong block.
215                         */
216                         blk      = get_nodes_block(node);
217                         pred_blk = get_nodes_block(pred);
218                         if (blk != pred_blk) {
219                                 wenv->changes |= DF_CHANGED;
220                                 set_nodes_block(node, pred_blk);
221                         }
222                 } else if (op == op_Store) {
223                         ldst_info = get_ldst_info(pred, wenv);
224
225                         wenv->changes |= update_projs(ldst_info, node);
226
227                         if ((ldst_info->flags & LDST_VISITED) == 0) {
228                                 adr = get_Store_ptr(pred);
229                                 ldst_info->flags |= LDST_VISITED;
230                         }
231
232                         /*
233                         * Place the Proj's to the same block as the
234                         * predecessor Store. This is always ok and prevents
235                         * "non-SSA" form after optimizations if the Proj
236                         * is in a wrong block.
237                         */
238                         blk      = get_nodes_block(node);
239                         pred_blk = get_nodes_block(pred);
240                         if (blk != pred_blk) {
241                                 wenv->changes |= DF_CHANGED;
242                                 set_nodes_block(node, pred_blk);
243                         }
244                 }
245         } else if (op == op_Block) {
246                 int i;
247
248                 for (i = get_Block_n_cfgpreds(node) - 1; i >= 0; --i) {
249                         ir_node      *pred_block;
250                         block_info_t *bl_info;
251
252                         pred = skip_Proj(get_Block_cfgpred(node, i));
253
254                         /* ignore Bad predecessors, they will be removed later */
255                         if (is_Bad(pred))
256                                 continue;
257
258                         pred_block = get_nodes_block(pred);
259                         bl_info    = get_block_info(pred_block, wenv);
260
261                         if (is_fragile_op(pred))
262                                 bl_info->flags |= BLOCK_HAS_EXC;
263                         else if (is_irn_forking(pred))
264                                 bl_info->flags |= BLOCK_HAS_COND;
265
266                         if (get_irn_op(pred) == op_Load || get_irn_op(pred) == op_Store) {
267                                 ldst_info = get_ldst_info(pred, wenv);
268
269                                 wenv->changes |= update_exc(ldst_info, node, i);
270                         }
271                 }
272         }
273 }  /* collect_nodes */
274
275 /**
276  * Returns an entity if the address ptr points to a constant one.
277  *
278  * @param ptr  the address
279  *
280  * @return an entity or NULL
281  */
282 static ir_entity *find_constant_entity(ir_node *ptr)
283 {
284         for (;;) {
285                 ir_op *op = get_irn_op(ptr);
286
287                 if (op == op_SymConst && (get_SymConst_kind(ptr) == symconst_addr_ent)) {
288                         ir_entity *ent = get_SymConst_entity(ptr);
289                         if (variability_constant == get_entity_variability(ent))
290                                 return ent;
291                         return NULL;
292                 } else if (op == op_Sel) {
293                         ir_entity *ent = get_Sel_entity(ptr);
294                         ir_type   *tp  = get_entity_owner(ent);
295
296                         /* Do not fiddle with polymorphism. */
297                         if (is_Class_type(get_entity_owner(ent)) &&
298                                 ((get_entity_n_overwrites(ent)    != 0) ||
299                                 (get_entity_n_overwrittenby(ent) != 0)   ) )
300                                 return NULL;
301
302                         if (is_Array_type(tp)) {
303                                 /* check bounds */
304                                 int i, n;
305
306                                 for (i = 0, n = get_Sel_n_indexs(ptr); i < n; ++i) {
307                                         ir_node *bound;
308                                         tarval *tlower, *tupper;
309                                         ir_node *index = get_Sel_index(ptr, i);
310                                         tarval *tv     = computed_value(index);
311
312                                         /* check if the index is constant */
313                                         if (tv == tarval_bad)
314                                                 return NULL;
315
316                                         bound  = get_array_lower_bound(tp, i);
317                                         tlower = computed_value(bound);
318                                         bound  = get_array_upper_bound(tp, i);
319                                         tupper = computed_value(bound);
320
321                                         if (tlower == tarval_bad || tupper == tarval_bad)
322                                                 return NULL;
323
324                                         if (tarval_cmp(tv, tlower) & pn_Cmp_Lt)
325                                                 return NULL;
326                                         if (tarval_cmp(tupper, tv) & pn_Cmp_Lt)
327                                                 return NULL;
328
329                                         /* ok, bounds check finished */
330                                 }
331                         }
332
333                         if (variability_constant == get_entity_variability(ent))
334                                 return ent;
335
336                         /* try next */
337                         ptr = get_Sel_ptr(ptr);
338                 } else
339                         return NULL;
340         }
341 }  /* find_constant_entity */
342
343 /**
344  * Return the Selection index of a Sel node from dimension n
345  */
346 static long get_Sel_array_index_long(ir_node *n, int dim) {
347         ir_node *index = get_Sel_index(n, dim);
348         assert(get_irn_op(index) == op_Const);
349         return get_tarval_long(get_Const_tarval(index));
350 }  /* get_Sel_array_index_long */
351
352 /**
353  * Returns the accessed component graph path for an
354  * node computing an address.
355  *
356  * @param ptr    the node computing the address
357  * @param depth  current depth in steps upward from the root
358  *               of the address
359  */
360 static compound_graph_path *rec_get_accessed_path(ir_node *ptr, int depth) {
361         compound_graph_path *res = NULL;
362         ir_entity           *root, *field;
363         int                 path_len, pos;
364
365         if (get_irn_op(ptr) == op_SymConst) {
366                 /* a SymConst. If the depth is 0, this is an access to a global
367                  * entity and we don't need a component path, else we know
368                  * at least it's length.
369                  */
370                 assert(get_SymConst_kind(ptr) == symconst_addr_ent);
371                 root = get_SymConst_entity(ptr);
372                 res = (depth == 0) ? NULL : new_compound_graph_path(get_entity_type(root), depth);
373         } else {
374                 assert(get_irn_op(ptr) == op_Sel);
375                 /* it's a Sel, go up until we find the root */
376                 res = rec_get_accessed_path(get_Sel_ptr(ptr), depth+1);
377
378                 /* fill up the step in the path at the current position */
379                 field    = get_Sel_entity(ptr);
380                 path_len = get_compound_graph_path_length(res);
381                 pos      = path_len - depth - 1;
382                 set_compound_graph_path_node(res, pos, field);
383
384                 if (is_Array_type(get_entity_owner(field))) {
385                         assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
386                         set_compound_graph_path_array_index(res, pos, get_Sel_array_index_long(ptr, 0));
387                 }
388         }
389         return res;
390 }  /* rec_get_accessed_path */
391
392 /** Returns an access path or NULL.  The access path is only
393  *  valid, if the graph is in phase_high and _no_ address computation is used.
394  */
395 static compound_graph_path *get_accessed_path(ir_node *ptr) {
396         return rec_get_accessed_path(ptr, 0);
397 }  /* get_accessed_path */
398
399 /* forward */
400 static void reduce_adr_usage(ir_node *ptr);
401
402 /**
403  * Update a Load that may lost it's usage.
404  */
405 static void handle_load_update(ir_node *load) {
406         ldst_info_t *info = get_irn_link(load);
407
408         /* do NOT touch volatile loads for now */
409         if (get_Load_volatility(load) == volatility_is_volatile)
410                 return;
411
412         if (! info->projs[pn_Load_res] && ! info->projs[pn_Load_X_except]) {
413                 ir_node *ptr = get_Load_ptr(load);
414                 ir_node *mem = get_Load_mem(load);
415
416                 /* a Load which value is neither used nor exception checked, remove it */
417                 exchange(info->projs[pn_Load_M], mem);
418                 exchange(load, new_Bad());
419                 reduce_adr_usage(ptr);
420         }
421 }  /* handle_load_update */
422
423 /**
424  * A Use of an address node is vanished. Check if this was a Proj
425  * node and update the counters.
426  */
427 static void reduce_adr_usage(ir_node *ptr) {
428         if (is_Proj(ptr)) {
429                 if (get_irn_n_edges(ptr) <= 0) {
430                         /* this Proj is dead now */
431                         ir_node *pred = get_Proj_pred(ptr);
432
433                         if (is_Load(pred)) {
434                                 ldst_info_t *info = get_irn_link(pred);
435                                 info->projs[get_Proj_proj(ptr)] = NULL;
436
437                                 /* this node lost it's result proj, handle that */
438                                 handle_load_update(pred);
439                         }
440                 }
441         }
442 }  /* reduce_adr_usage */
443
444 /**
445  * Check, if an already existing value of mode old_mode can be converted
446  * into the needed one new_mode without loss.
447  */
448 static int can_use_stored_value(ir_mode *old_mode, ir_mode *new_mode) {
449         if (old_mode == new_mode)
450                 return 1;
451
452         /* if both modes are two-complement ones, we can always convert the
453            Stored value into the needed one. */
454         if (get_mode_size_bits(old_mode) >= get_mode_size_bits(new_mode) &&
455                   get_mode_arithmetic(old_mode) == irma_twos_complement &&
456                   get_mode_arithmetic(new_mode) == irma_twos_complement)
457                 return 1;
458         return 0;
459 }  /* can_use_stored_value */
460
461 /**
462  * Follow the memory chain as long as there are only Loads
463  * and alias free Stores and try to replace current Load or Store
464  * by a previous ones.
465  * Note that in unreachable loops it might happen that we reach
466  * load again, as well as we can fall into a cycle.
467  * We break such cycles using a special visited flag.
468  *
469  * INC_MASTER() must be called before dive into
470  */
471 static unsigned follow_Mem_chain(ir_node *load, ir_node *curr) {
472         unsigned res = 0;
473         ldst_info_t *info = get_irn_link(load);
474         ir_node *pred;
475         ir_node *ptr       = get_Load_ptr(load);
476         ir_node *mem       = get_Load_mem(load);
477         ir_mode *load_mode = get_Load_mode(load);
478
479         for (pred = curr; load != pred; ) {
480                 ldst_info_t *pred_info = get_irn_link(pred);
481
482                 /*
483                  * BEWARE: one might think that checking the modes is useless, because
484                  * if the pointers are identical, they refer to the same object.
485                  * This is only true in strong typed languages, not in C were the following
486                  * is possible a = *(ir_type1 *)p; b = *(ir_type2 *)p ...
487                  */
488                 if (get_irn_op(pred) == op_Store && get_Store_ptr(pred) == ptr &&
489                     can_use_stored_value(get_irn_mode(get_Store_value(pred)), load_mode)) {
490                         /*
491                          * a Load immediately after a Store -- a read after write.
492                          * We may remove the Load, if both Load & Store does not have an exception handler
493                          * OR they are in the same block. In the latter case the Load cannot
494                          * throw an exception when the previous Store was quiet.
495                          *
496                          * Why we need to check for Store Exception? If the Store cannot
497                          * be executed (ROM) the exception handler might simply jump into
498                          * the load block :-(
499                          * We could make it a little bit better if we would know that the exception
500                          * handler of the Store jumps directly to the end...
501                          */
502                         if ((pred_info->projs[pn_Store_X_except] == NULL && info->projs[pn_Load_X_except] == NULL) ||
503                             get_nodes_block(load) == get_nodes_block(pred)) {
504                                 ir_node *value = get_Store_value(pred);
505
506                                 DBG_OPT_RAW(load, value);
507
508                                 /* add an convert if needed */
509                                 if (get_irn_mode(get_Store_value(pred)) != load_mode) {
510                                         value = new_r_Conv(current_ir_graph, get_nodes_block(load), value, load_mode);
511                                 }
512
513                                 if (info->projs[pn_Load_M])
514                                         exchange(info->projs[pn_Load_M], mem);
515
516                                 /* no exception */
517                                 if (info->projs[pn_Load_X_except]) {
518                                         exchange( info->projs[pn_Load_X_except], new_Bad());
519                                         res |= CF_CHANGED;
520                                 }
521
522                                 if (info->projs[pn_Load_res])
523                                         exchange(info->projs[pn_Load_res], value);
524
525                                 exchange(load, new_Bad());
526                                 reduce_adr_usage(ptr);
527                                 return res | DF_CHANGED;
528                         }
529                 } else if (get_irn_op(pred) == op_Load && get_Load_ptr(pred) == ptr &&
530                            can_use_stored_value(get_Load_mode(pred), load_mode)) {
531                         /*
532                          * a Load after a Load -- a read after read.
533                          * We may remove the second Load, if it does not have an exception handler
534                          * OR they are in the same block. In the later case the Load cannot
535                          * throw an exception when the previous Load was quiet.
536                          *
537                          * Here, there is no need to check if the previous Load has an exception
538                          * hander because they would have exact the same exception...
539                          */
540                         if (info->projs[pn_Load_X_except] == NULL || get_nodes_block(load) == get_nodes_block(pred)) {
541                                 ir_node *value;
542
543                                 DBG_OPT_RAR(load, pred);
544
545                                 /* the result is used */
546                                 if (info->projs[pn_Load_res]) {
547                                         if (pred_info->projs[pn_Load_res] == NULL) {
548                                                 /* create a new Proj again */
549                                                 pred_info->projs[pn_Load_res] = new_r_Proj(current_ir_graph, get_nodes_block(pred), pred, get_Load_mode(pred), pn_Load_res);
550                                         }
551                                         value = pred_info->projs[pn_Load_res];
552
553                                         /* add an convert if needed */
554                                         if (get_Load_mode(pred) != load_mode) {
555                                                 value = new_r_Conv(current_ir_graph, get_nodes_block(load), value, load_mode);
556                                         }
557
558                                         exchange(info->projs[pn_Load_res], value);
559                                 }
560
561                                 if (info->projs[pn_Load_M])
562                                         exchange(info->projs[pn_Load_M], mem);
563
564                                 /* no exception */
565                                 if (info->projs[pn_Load_X_except]) {
566                                         exchange(info->projs[pn_Load_X_except], new_Bad());
567                                         res |= CF_CHANGED;
568                                 }
569
570                                 exchange(load, new_Bad());
571                                 reduce_adr_usage(ptr);
572                                 return res |= DF_CHANGED;
573                         }
574                 }
575
576                 if (get_irn_op(pred) == op_Store) {
577                         /* check if we can pass through this store */
578                         ir_alias_relation rel = get_alias_relation(
579                                 current_ir_graph,
580                                 get_Store_ptr(pred),
581                                 get_irn_mode(get_Store_value(pred)),
582                                 ptr, load_mode);
583                         /* if the might be an alias, we cannot pass this Store */
584                         if (rel != no_alias)
585                                 break;
586                         pred = skip_Proj(get_Store_mem(pred));
587                 } else if (get_irn_op(pred) == op_Load) {
588                         pred = skip_Proj(get_Load_mem(pred));
589                 } else {
590                         /* follow only Load chains */
591                         break;
592                 }
593
594                 /* check for cycles */
595                 if (NODE_VISITED(pred_info))
596                         break;
597                 MARK_NODE(pred_info);
598         }
599
600         if (get_irn_op(pred) == op_Sync) {
601                 int i;
602
603                 /* handle all Sync predecessors */
604                 for (i = get_Sync_n_preds(pred) - 1; i >= 0; --i) {
605                         res |= follow_Mem_chain(load, skip_Proj(get_Sync_pred(pred, i)));
606                         if (res)
607                                 break;
608                 }
609         }
610
611         return res;
612 }  /* follow_Mem_chain */
613
614 /**
615  * optimize a Load
616  *
617  * @param load  the Load node
618  */
619 static unsigned optimize_load(ir_node *load)
620 {
621         ldst_info_t *info = get_irn_link(load);
622         ir_node *mem, *ptr, *new_node;
623         ir_entity *ent;
624         unsigned res = 0;
625
626         /* do NOT touch volatile loads for now */
627         if (get_Load_volatility(load) == volatility_is_volatile)
628                 return 0;
629
630         /* the address of the load to be optimized */
631         ptr = get_Load_ptr(load);
632
633         /*
634          * Check if we can remove the exception from a Load:
635          * This can be done, if the address is from an Sel(Alloc) and
636          * the Sel type is a subtype of the allocated type.
637          *
638          * This optimizes some often used OO constructs,
639          * like x = new O; x->t;
640          */
641         if (info->projs[pn_Load_X_except]) {
642                 if (is_Sel(ptr)) {
643                         ir_node *mem = get_Sel_mem(ptr);
644
645                         /* FIXME: works with the current FE, but better use the base */
646                         if (get_irn_op(skip_Proj(mem)) == op_Alloc) {
647                                 /* ok, check the types */
648                                 ir_entity *ent    = get_Sel_entity(ptr);
649                                 ir_type   *s_type = get_entity_type(ent);
650                                 ir_type   *a_type = get_Alloc_type(mem);
651
652                                 if (is_SubClass_of(s_type, a_type)) {
653                                         /* ok, condition met: there can't be an exception because
654                                         * Alloc guarantees that enough memory was allocated */
655
656                                         exchange(info->projs[pn_Load_X_except], new_Bad());
657                                         info->projs[pn_Load_X_except] = NULL;
658                                         res |= CF_CHANGED;
659                                 }
660                         }
661                 } else if ((get_irn_op(skip_Proj(ptr)) == op_Alloc) ||
662                         ((get_irn_op(ptr) == op_Cast) && (get_irn_op(skip_Proj(get_Cast_op(ptr))) == op_Alloc))) {
663                                 /* simple case: a direct load after an Alloc. Firm Alloc throw
664                                  * an exception in case of out-of-memory. So, there is no way for an
665                                  * exception in this load.
666                                  * This code is constructed by the "exception lowering" in the Jack compiler.
667                                  */
668                                 exchange(info->projs[pn_Load_X_except], new_Bad());
669                                 info->projs[pn_Load_X_except] = NULL;
670                                 res |= CF_CHANGED;
671                 }
672         }
673
674         /* The mem of the Load. Must still be returned after optimization. */
675         mem  = get_Load_mem(load);
676
677         if (! info->projs[pn_Load_res] && ! info->projs[pn_Load_X_except]) {
678                 /* a Load which value is neither used nor exception checked, remove it */
679                 exchange(info->projs[pn_Load_M], mem);
680
681                 exchange(load, new_Bad());
682                 reduce_adr_usage(ptr);
683                 return res | DF_CHANGED;
684         }
685
686         /* Load from a constant polymorphic field, where we can resolve
687            polymorphism. */
688         new_node = transform_node_Load(load);
689         if (new_node != load) {
690                 if (info->projs[pn_Load_M]) {
691                         exchange(info->projs[pn_Load_M], mem);
692                         info->projs[pn_Load_M] = NULL;
693                 }
694                 if (info->projs[pn_Load_X_except]) {
695                         exchange(info->projs[pn_Load_X_except], new_Bad());
696                         info->projs[pn_Load_X_except] = NULL;
697                 }
698                 if (info->projs[pn_Load_res])
699                         exchange(info->projs[pn_Load_res], new_node);
700
701                 exchange(load, new_Bad());
702                 reduce_adr_usage(ptr);
703                 return res | DF_CHANGED;
704         }
705
706         /* check if we can determine the entity that will be loaded */
707         ent = find_constant_entity(ptr);
708         if (ent) {
709                 if ((allocation_static == get_entity_allocation(ent)) &&
710                         (visibility_external_allocated != get_entity_visibility(ent))) {
711                         /* a static allocation that is not external: there should be NO exception
712                          * when loading. */
713
714                         /* no exception, clear the info field as it might be checked later again */
715                         if (info->projs[pn_Load_X_except]) {
716                                 exchange(info->projs[pn_Load_X_except], new_Bad());
717                                 info->projs[pn_Load_X_except] = NULL;
718                                 res |= CF_CHANGED;
719                         }
720
721                         if (variability_constant == get_entity_variability(ent)
722                                 && is_atomic_entity(ent)) {
723                                 /* Might not be atomic after
724                                    lowering of Sels.  In this
725                                    case we could also load, but
726                                    it's more complicated. */
727                                 /* more simpler case: we load the content of a constant value:
728                                  * replace it by the constant itself
729                                  */
730
731                                 /* no memory */
732                                 if (info->projs[pn_Load_M]) {
733                                         exchange(info->projs[pn_Load_M], mem);
734                                         res |= DF_CHANGED;
735                                 }
736                                 /* no result :-) */
737                                 if (info->projs[pn_Load_res]) {
738                                         if (is_atomic_entity(ent)) {
739                                                 ir_node *c = copy_const_value(get_irn_dbg_info(load), get_atomic_ent_value(ent));
740
741                                                 DBG_OPT_RC(load, c);
742                                                 exchange(info->projs[pn_Load_res], c);
743                                                 res |= DF_CHANGED;
744                                         }
745                                 }
746                                 exchange(load, new_Bad());
747                                 reduce_adr_usage(ptr);
748                                 return res;
749                         } else if (variability_constant == get_entity_variability(ent)) {
750                                 compound_graph_path *path = get_accessed_path(ptr);
751
752                                 if (path) {
753                                         ir_node *c;
754
755                                         assert(is_proper_compound_graph_path(path, get_compound_graph_path_length(path)-1));
756                                         /*
757                                         {
758                                                 int j;
759                                                 for (j = 0; j < get_compound_graph_path_length(path); ++j) {
760                                                         ir_entity *node = get_compound_graph_path_node(path, j);
761                                                         fprintf(stdout, ".%s", get_entity_name(node));
762                                                         if (is_Array_type(get_entity_owner(node)))
763                                                                 fprintf(stdout, "[%d]", get_compound_graph_path_array_index(path, j));
764                                                 }
765                                                 printf("\n");
766                                         }
767                                         */
768
769                                         c = get_compound_ent_value_by_path(ent, path);
770                                         free_compound_graph_path(path);
771
772                                         /* printf("  cons: "); DDMN(c); */
773
774                                         if (info->projs[pn_Load_M]) {
775                                                 exchange(info->projs[pn_Load_M], mem);
776                                                 res |= DF_CHANGED;
777                                         }
778                                         if (info->projs[pn_Load_res]) {
779                                                 exchange(info->projs[pn_Load_res], copy_const_value(get_irn_dbg_info(load), c));
780                                                 res |= DF_CHANGED;
781                                         }
782                                         exchange(load, new_Bad());
783                                         reduce_adr_usage(ptr);
784                                         return res;
785                                 } else {
786                                         /*  We can not determine a correct access path.  E.g., in jack, we load
787                                         a byte from an object to generate an exception.   Happens in test program
788                                         Reflectiontest.
789                                         printf(">>>>>>>>>>>>> Found access to constant entity %s in function %s\n", get_entity_name(ent),
790                                         get_entity_name(get_irg_entity(current_ir_graph)));
791                                         printf("  load: "); DDMN(load);
792                                         printf("  ptr:  "); DDMN(ptr);
793                                         */
794                                 }
795                         }
796                 }
797         }
798
799         /* Check, if the address of this load is used more than once.
800          * If not, this load cannot be removed in any case. */
801         if (get_irn_n_uses(ptr) <= 1)
802                 return res;
803
804         /*
805          * follow the memory chain as long as there are only Loads
806          * and try to replace current Load or Store by a previous one.
807          * Note that in unreachable loops it might happen that we reach
808          * load again, as well as we can fall into a cycle.
809          * We break such cycles using a special visited flag.
810          */
811         INC_MASTER();
812         res = follow_Mem_chain(load, skip_Proj(mem));
813         return res;
814 }  /* optimize_load */
815
816 /**
817  * Check whether a value of mode new_mode would completely overwrite a value
818  * of mode old_mode in memory.
819  */
820 static int is_completely_overwritten(ir_mode *old_mode, ir_mode *new_mode)
821 {
822         return get_mode_size_bits(new_mode) >= get_mode_size_bits(old_mode);
823 }  /* is_completely_overwritten */
824
825 /**
826  * follow the memory chain as long as there are only Loads and alias free Stores.
827  *
828  * INC_MASTER() must be called before dive into
829  */
830 static unsigned follow_Mem_chain_for_Store(ir_node *store, ir_node *curr) {
831         unsigned res = 0;
832         ldst_info_t *info = get_irn_link(store);
833         ir_node *pred;
834         ir_node *ptr = get_Store_ptr(store);
835         ir_node *mem = get_Store_mem(store);
836         ir_node *value = get_Store_value(store);
837         ir_mode *mode  = get_irn_mode(value);
838         ir_node *block = get_nodes_block(store);
839
840         for (pred = curr; pred != store;) {
841                 ldst_info_t *pred_info = get_irn_link(pred);
842
843                 /*
844                  * BEWARE: one might think that checking the modes is useless, because
845                  * if the pointers are identical, they refer to the same object.
846                  * This is only true in strong typed languages, not is C were the following
847                  * is possible *(ir_type1 *)p = a; *(ir_type2 *)p = b ...
848                  * However, if the mode that is written have a bigger  or equal size the the old
849                  * one, the old value is completely overwritten and can be killed ...
850                  */
851                 if (get_irn_op(pred) == op_Store && get_Store_ptr(pred) == ptr &&
852                     get_nodes_block(pred) == block &&
853                     is_completely_overwritten(get_irn_mode(get_Store_value(pred)), mode)) {
854                         /*
855                          * a Store after a Store in the same block -- a write after write.
856                          * We may remove the first Store, if it does not have an exception handler.
857                          *
858                          * TODO: What, if both have the same exception handler ???
859                          */
860                         if (get_Store_volatility(pred) != volatility_is_volatile && !pred_info->projs[pn_Store_X_except]) {
861                                 DBG_OPT_WAW(pred, store);
862                                 exchange( pred_info->projs[pn_Store_M], get_Store_mem(pred) );
863                                 exchange(pred, new_Bad());
864                                 reduce_adr_usage(ptr);
865                                 return DF_CHANGED;
866                         }
867                 } else if (get_irn_op(pred) == op_Load && get_Load_ptr(pred) == ptr &&
868                            value == pred_info->projs[pn_Load_res]) {
869                         /*
870                          * a Store of a value after a Load -- a write after read.
871                          * We may remove the second Store, if it does not have an exception handler.
872                          */
873                         if (! info->projs[pn_Store_X_except]) {
874                                 DBG_OPT_WAR(store, pred);
875                                 exchange( info->projs[pn_Store_M], mem );
876                                 exchange(store, new_Bad());
877                                 reduce_adr_usage(ptr);
878                                 return DF_CHANGED;
879                         }
880                 }
881
882                 if (get_irn_op(pred) == op_Store) {
883                         /* check if we can pass thru this store */
884                         ir_alias_relation rel = get_alias_relation(
885                                 current_ir_graph,
886                                 get_Store_ptr(pred),
887                                 get_irn_mode(get_Store_value(pred)),
888                                 ptr, mode);
889                         /* if the might be an alias, we cannot pass this Store */
890                         if (rel != no_alias)
891                                 break;
892                         pred = skip_Proj(get_Store_mem(pred));
893                 } else if (get_irn_op(pred) == op_Load) {
894                         pred = skip_Proj(get_Load_mem(pred));
895                 } else {
896                         /* follow only Load chains */
897                         break;
898                 }
899
900                 /* check for cycles */
901                 if (NODE_VISITED(pred_info))
902                         break;
903                 MARK_NODE(pred_info);
904         }
905
906         if (get_irn_op(pred) == op_Sync) {
907                 int i;
908
909                 /* handle all Sync predecessors */
910                 for (i = get_Sync_n_preds(pred) - 1; i >= 0; --i) {
911                         res |= follow_Mem_chain_for_Store(store, skip_Proj(get_Sync_pred(pred, i)));
912                         if (res)
913                                 break;
914                 }
915         }
916         return res;
917 }  /* follow_Mem_chain_for_Store */
918
919 /**
920  * optimize a Store
921  *
922  * @param store  the Store node
923  */
924 static unsigned optimize_store(ir_node *store) {
925         ir_node *ptr, *mem;
926
927         if (get_Store_volatility(store) == volatility_is_volatile)
928                 return 0;
929
930         ptr = get_Store_ptr(store);
931
932         /* Check, if the address of this Store is used more than once.
933          * If not, this Store cannot be removed in any case. */
934         if (get_irn_n_uses(ptr) <= 1)
935                 return 0;
936
937         mem = get_Store_mem(store);
938
939         /* follow the memory chain as long as there are only Loads */
940         INC_MASTER();
941         return follow_Mem_chain_for_Store(store, skip_Proj(mem));
942 }  /* optimize_store */
943
944 /**
945  * walker, optimizes Phi after Stores to identical places:
946  * Does the following optimization:
947  * @verbatim
948  *
949  *   val1   val2   val3          val1  val2  val3
950  *    |      |      |               \    |    /
951  *  Store  Store  Store              \   |   /
952  *      \    |    /                   PhiData
953  *       \   |   /                       |
954  *        \  |  /                      Store
955  *          PhiM
956  *
957  * @endverbatim
958  * This reduces the number of stores and allows for predicated execution.
959  * Moves Stores back to the end of a function which may be bad.
960  *
961  * This is only possible if the predecessor blocks have only one successor.
962  */
963 static unsigned optimize_phi(ir_node *phi, walk_env_t *wenv)
964 {
965         int i, n;
966         ir_node *store, *old_store, *ptr, *block, *phi_block, *phiM, *phiD, *exc, *projM;
967         ir_mode *mode;
968         ir_node **inM, **inD, **stores;
969         int *idx;
970         dbg_info *db = NULL;
971         ldst_info_t *info;
972         block_info_t *bl_info;
973         unsigned res = 0;
974
975         /* Must be a memory Phi */
976         if (get_irn_mode(phi) != mode_M)
977                 return 0;
978
979         n = get_Phi_n_preds(phi);
980         if (n <= 0)
981                 return 0;
982
983         store = skip_Proj(get_Phi_pred(phi, 0));
984         old_store = store;
985         if (get_irn_op(store) != op_Store)
986                 return 0;
987
988         block = get_nodes_block(store);
989
990         /* abort on dead blocks */
991         if (is_Block_dead(block))
992                 return 0;
993
994         /* check if the block is post dominated by Phi-block
995            and has no exception exit */
996         bl_info = get_irn_link(block);
997         if (bl_info->flags & BLOCK_HAS_EXC)
998                 return 0;
999
1000         phi_block = get_nodes_block(phi);
1001         if (! block_postdominates(phi_block, block))
1002                 return 0;
1003
1004         /* this is the address of the store */
1005         ptr  = get_Store_ptr(store);
1006         mode = get_irn_mode(get_Store_value(store));
1007         info = get_irn_link(store);
1008         exc  = info->exc_block;
1009
1010         for (i = 1; i < n; ++i) {
1011                 ir_node *pred = skip_Proj(get_Phi_pred(phi, i));
1012
1013                 if (get_irn_op(pred) != op_Store)
1014                         return 0;
1015
1016                 if (ptr != get_Store_ptr(pred) || mode != get_irn_mode(get_Store_value(pred)))
1017                         return 0;
1018
1019                 info = get_irn_link(pred);
1020
1021                 /* check, if all stores have the same exception flow */
1022                 if (exc != info->exc_block)
1023                         return 0;
1024
1025                 /* abort on dead blocks */
1026                 block = get_nodes_block(pred);
1027                 if (is_Block_dead(block))
1028                         return 0;
1029
1030                 /* check if the block is post dominated by Phi-block
1031                    and has no exception exit. Note that block must be different from
1032                    Phi-block, else we would move a Store from end End of a block to its
1033                    Start... */
1034                 bl_info = get_irn_link(block);
1035                 if (bl_info->flags & BLOCK_HAS_EXC)
1036                         return 0;
1037                 if (block == phi_block || ! block_postdominates(phi_block, block))
1038                         return 0;
1039         }
1040
1041         /*
1042          * ok, when we are here, we found all predecessors of a Phi that
1043          * are Stores to the same address and size. That means whatever
1044          * we do before we enter the block of the Phi, we do a Store.
1045          * So, we can move the Store to the current block:
1046          *
1047          *   val1    val2    val3          val1  val2  val3
1048          *    |       |       |               \    |    /
1049          * | Str | | Str | | Str |             \   |   /
1050          *      \     |     /                   PhiData
1051          *       \    |    /                       |
1052          *        \   |   /                       Str
1053          *           PhiM
1054          *
1055          * Is only allowed if the predecessor blocks have only one successor.
1056          */
1057
1058         NEW_ARR_A(ir_node *, stores, n);
1059         NEW_ARR_A(ir_node *, inM, n);
1060         NEW_ARR_A(ir_node *, inD, n);
1061         NEW_ARR_A(int, idx, n);
1062
1063         /* Prepare: Collect all Store nodes.  We must do this
1064            first because we otherwise may loose a store when exchanging its
1065            memory Proj.
1066          */
1067         for (i = 0; i < n; ++i)
1068                 stores[i] = skip_Proj(get_Phi_pred(phi, i));
1069
1070         /* Prepare: Skip the memory Proj: we need this in the case some stores
1071            are cascaded.
1072            Beware: One Store might be included more than once in the stores[]
1073            list, so we must prevent to do the exchange more than once.
1074          */
1075         for (i = 0; i < n; ++i) {
1076                 ir_node *store = stores[i];
1077                 ir_node *proj_m;
1078
1079                 info = get_irn_link(store);
1080                 proj_m = info->projs[pn_Store_M];
1081
1082                 if (is_Proj(proj_m) && get_Proj_pred(proj_m) == store)
1083                         exchange(proj_m, get_Store_mem(store));
1084         }
1085
1086         /* first step: collect all inputs */
1087         for (i = 0; i < n; ++i) {
1088                 ir_node *store = stores[i];
1089                 info = get_irn_link(store);
1090
1091                 inM[i] = get_Store_mem(store);
1092                 inD[i] = get_Store_value(store);
1093                 idx[i] = info->exc_idx;
1094         }
1095         block = get_nodes_block(phi);
1096
1097         /* second step: create a new memory Phi */
1098         phiM = new_rd_Phi(get_irn_dbg_info(phi), current_ir_graph, block, n, inM, mode_M);
1099
1100         /* third step: create a new data Phi */
1101         phiD = new_rd_Phi(get_irn_dbg_info(phi), current_ir_graph, block, n, inD, mode);
1102
1103         /* fourth step: create the Store */
1104         store = new_rd_Store(db, current_ir_graph, block, phiM, ptr, phiD);
1105 #ifdef DO_CACHEOPT
1106         co_set_irn_name(store, co_get_irn_ident(old_store));
1107 #endif
1108
1109         projM = new_rd_Proj(NULL, current_ir_graph, block, store, mode_M, pn_Store_M);
1110
1111         info = get_ldst_info(store, wenv);
1112         info->projs[pn_Store_M] = projM;
1113
1114         /* fifths step: repair exception flow */
1115         if (exc) {
1116                 ir_node *projX = new_rd_Proj(NULL, current_ir_graph, block, store, mode_X, pn_Store_X_except);
1117
1118                 info->projs[pn_Store_X_except] = projX;
1119                 info->exc_block                = exc;
1120                 info->exc_idx                  = idx[0];
1121
1122                 for (i = 0; i < n; ++i) {
1123                         set_Block_cfgpred(exc, idx[i], projX);
1124                 }
1125
1126                 if (n > 1) {
1127                         /* the exception block should be optimized as some inputs are identical now */
1128                 }
1129
1130                 res |= CF_CHANGED;
1131         }
1132
1133         /* sixth step: replace old Phi */
1134         exchange(phi, projM);
1135
1136         return res | DF_CHANGED;
1137 }  /* optimize_phi */
1138
1139 /**
1140  * walker, do the optimizations
1141  */
1142 static void do_load_store_optimize(ir_node *n, void *env) {
1143         walk_env_t *wenv = env;
1144
1145         switch (get_irn_opcode(n)) {
1146
1147         case iro_Load:
1148                 wenv->changes |= optimize_load(n);
1149                 break;
1150
1151         case iro_Store:
1152                 wenv->changes |= optimize_store(n);
1153                 break;
1154
1155         case iro_Phi:
1156                 wenv->changes |= optimize_phi(n, wenv);
1157
1158         default:
1159                 ;
1160         }
1161 }  /* do_load_store_optimize */
1162
1163 /*
1164  * do the load store optimization
1165  */
1166 void optimize_load_store(ir_graph *irg) {
1167         walk_env_t env;
1168
1169         assert(get_irg_phase_state(irg) != phase_building);
1170         assert(get_irg_pinned(irg) != op_pin_state_floats &&
1171                 "LoadStore optimization needs pinned graph");
1172
1173         if (! get_opt_redundant_loadstore())
1174                 return;
1175
1176         edges_assure(irg);
1177
1178         /* for Phi optimization post-dominators are needed ... */
1179         assure_postdoms(irg);
1180
1181         if (get_opt_alias_analysis()) {
1182                 assure_irg_address_taken_computed(irg);
1183                 assure_irp_globals_address_taken_computed();
1184         }
1185
1186         obstack_init(&env.obst);
1187         env.changes = 0;
1188
1189         /* init the links, then collect Loads/Stores/Proj's in lists */
1190         master_visited = 0;
1191         irg_walk_graph(irg, firm_clear_link, collect_nodes, &env);
1192
1193         /* now we have collected enough information, optimize */
1194         irg_walk_graph(irg, NULL, do_load_store_optimize, &env);
1195
1196         obstack_free(&env.obst, NULL);
1197
1198         /* Handle graph state */
1199         if (env.changes) {
1200                 if (get_irg_outs_state(irg) == outs_consistent)
1201                         set_irg_outs_inconsistent(irg);
1202         }
1203
1204         if (env.changes & CF_CHANGED) {
1205                 /* is this really needed: Yes, control flow changed, block might
1206                 have Bad() predecessors. */
1207                 set_irg_doms_inconsistent(irg);
1208         }
1209 }  /* optimize_load_store */