2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Dataflow driven Load/Store optimizations, uses some ideas from
24 * @author Michael Beck
40 #include "iroptimize.h"
41 #include "irnodemap.h"
42 #include "raw_bitset.h"
47 /* maximum number of output Proj's */
48 #define MAX_PROJ (pn_Load_max > pn_Store_max ? pn_Load_max : pn_Store_max)
51 * Mapping an address to an dense ID.
53 typedef struct address_entry_t {
54 unsigned id; /**< The ID */
61 FLAG_KILL_ALL = 1, /**< KILL all addresses */
62 FLAG_KILLED_NODE = 2, /**< this node was killed */
63 FLAG_EXCEPTION = 4, /**< this node has exception flow */
64 FLAG_IGNORE = 8, /**< ignore this node (volatile or other) */
68 * A value: This represents a value stored at a given address in
69 * memory. Do not confuse with values from value numbering.
71 typedef struct value_t value_t;
73 ir_node *address; /**< the address of this value */
74 ir_node *value; /**< the value itself */
75 ir_mode *mode; /**< the mode of the value */
76 unsigned id; /**< address id */
80 * A memop describes an memory-related operation.
81 * These are Loads/Store and all other ops that might modify
82 * memory (Calls, CopyB) or causing exceptions.
84 typedef struct memop_t memop_t;
86 value_t value; /**< the value of this memop: only defined for Load/Store */
87 ir_node *node; /**< the memory op itself */
88 ir_node *mem; /**< the memory FROM this node */
89 ir_node *replace; /**< the replacement node if this memop is replaced */
90 memop_t *next; /**< links to the next memory op in the block in forward order. */
91 memop_t *prev; /**< links to the previous memory op in the block in forward order. */
92 unsigned flags; /**< memop flags */
93 ir_node *projs[MAX_PROJ]; /**< Projs of this memory op */
97 * Additional data for every basic block.
99 typedef struct block_t block_t;
101 memop_t *memop_forward; /**< topologically sorted list of memory ops in this block */
102 memop_t *memop_backward; /**< last memop in the list */
103 unsigned *avail_out; /**< out-set of available addresses */
104 memop_t **id_2_memop_avail; /**< maps avail address ids to memops */
105 unsigned *anticL_in; /**< in-set of anticipated Load addresses */
106 memop_t **id_2_memop_antic; /**< maps anticipated address ids to memops */
107 ir_node *block; /**< the associated block */
108 block_t *forward_next; /**< next block entry for forward iteration */
109 block_t *backward_next; /**< next block entry for backward iteration */
110 memop_t *avail; /**< used locally for the avail map */
111 memop_t **trans_results; /**< used to cached translated nodes due antic calculation. */
115 * Metadata for this pass.
117 typedef struct ldst_env_t {
118 struct obstack obst; /**< obstack for temporary data */
119 ir_nodemap_t adr_map; /**< Map addresses to */
120 block_t *forward; /**< Inverse post-order list of all blocks Start->End */
121 block_t *backward; /**< Inverse post-order list of all blocks End->Start */
122 ir_node *start_bl; /**< start block of the current graph */
123 ir_node *end_bl; /**< end block of the current graph */
124 unsigned *curr_set; /**< current set of addresses */
125 memop_t **curr_id_2_memop; /**< current map of address ids to memops */
126 unsigned curr_adr_id; /**< number for address mapping */
127 unsigned n_mem_ops; /**< number of memory operations (Loads/Stores) */
128 unsigned rbs_size; /**< size of all bitsets in bytes */
129 int max_cfg_preds; /**< maximum number of block cfg predecessors */
130 int changed; /**< Flags for changed graph state */
132 ir_node **id_2_address; /**< maps an id to the used address */
136 /* the one and only environment */
141 static firm_dbg_module_t *dbg;
144 * Dumps the block list.
146 * @param ldst environment
148 static void dump_block_list(ldst_env *env) {
153 for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
154 DB((dbg, LEVEL_2, "%+F {", entry->block));
157 for (op = entry->memop_forward; op != NULL; op = op->next) {
159 DB((dbg, LEVEL_2, "\n\t"));
160 } DB((dbg, LEVEL_2, "%+F", op->node));
161 if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
162 DB((dbg, LEVEL_2, "X"));
163 else if (op->flags & FLAG_KILL_ALL)
164 DB((dbg, LEVEL_2, "K"));
165 DB((dbg, LEVEL_2, ", "));
169 DB((dbg, LEVEL_2, "\n}\n\n"));
171 } /* dump_block_list */
174 * Dumps the current set.
176 * @param bl current block
177 * @param s name of the set
179 static void dump_curr(block_t *bl, const char *s) {
180 unsigned end = env.rbs_size - 1;
184 DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
186 for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
187 memop_t *op = env.curr_id_2_memop[pos];
190 DB((dbg, LEVEL_2, "\n\t"));
193 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
196 DB((dbg, LEVEL_2, "\n}\n"));
200 static void dump_block_list(ldst_env *env) {
203 static void dump_curr(block_t *bl, const char *s) {
207 #endif /* DEBUG_libfirm */
209 /** Get the block entry for a block node */
210 static block_t *get_block_entry(const ir_node *block) {
211 assert(is_Block(block));
213 return get_irn_link(block);
214 } /* get_block_entry */
216 /** Get the memop entry for a memory operation node */
217 static memop_t *get_irn_memop(const ir_node *irn) {
218 assert(! is_Block(irn));
219 return get_irn_link(irn);
220 } /* get_irn_memop */
223 * Walk over the memory edges from definition to users.
224 * This ensures, that even operation without memory output are found.
226 * @param irn start node
227 * @param pre pre walker function
228 * @param post post walker function
229 * @param ctx context parameter for the walker functions
231 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx) {
235 mark_irn_visited(irn);
240 mode = get_irn_mode(irn);
241 if (mode == mode_M) {
242 /* every successor uses memory */
243 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
244 ir_node *succ = get_irn_out(irn, i);
246 if (! irn_visited(succ))
247 walk_memory(succ, pre, post, ctx);
249 } else if (mode == mode_T) {
250 /* only some Proj's uses memory */
251 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
252 ir_node *proj = get_irn_out(irn, i);
254 if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
255 walk_memory(proj, pre, post, ctx);
263 * Walks over all memory nodes of a graph.
266 * @param pre pre walker function
267 * @param post post walker function
268 * @param ctx context parameter for the walker functions
270 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx) {
271 inc_irg_visited(irg);
273 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
276 * there are two possible sources for memory: initial_mem and nomem
277 * we ignore nomem as this should NOT change the memory
279 walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
281 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
282 } /* walk_memory_irg */
285 * Register an address and allocate a (sparse, 0..n) ID for it.
287 * @param adr the IR-node representing the address
289 * @return the allocated id
291 static unsigned register_address(ir_node *adr) {
292 address_entry *entry;
294 /* skip Confirms and Casts */
296 if (is_Confirm(adr)) {
297 adr = get_Confirm_value(adr);
301 adr = get_Cast_op(adr);
305 entry = ir_nodemap_get(&env.adr_map, adr);
309 entry = obstack_alloc(&env.obst, sizeof(*entry));
311 entry->id = env.curr_adr_id++;
312 ir_nodemap_insert(&env.adr_map, adr, entry);
314 DB((dbg, LEVEL_3, "ADDRESS %+F has ID %u\n", adr, entry->id));
316 ARR_APP1(ir_node *, env.id_2_address, adr);
320 } /* register_address */
324 * translate an address through a Phi node into a given predecessor
327 * @param address the address
328 * @param block the block
329 * @param pos the position of the predecessor in block
331 static ir_node *phi_translate(ir_node *address, const ir_node *block, int pos) {
332 if (is_Phi(address) && get_nodes_block(address) == block)
333 address = get_Phi_pred(address, pos);
335 } /* phi_translate */
338 * Walker: allocate an block entry for every block
339 * and register all potential addresses.
341 static void prepare_blocks(ir_node *irn, void *ctx) {
345 block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
348 entry->memop_forward = NULL;
349 entry->memop_backward = NULL;
350 entry->avail_out = NULL;
351 entry->id_2_memop_avail = NULL;
352 entry->anticL_in = NULL;
353 entry->id_2_memop_antic = NULL;
355 entry->forward_next = NULL;
356 entry->backward_next = NULL;
358 entry->trans_results = NULL;
359 set_irn_link(irn, entry);
361 set_Block_phis(irn, NULL);
363 /* use block marks to track unreachable blocks */
364 set_Block_mark(irn, 0);
366 n = get_Block_n_cfgpreds(irn);
367 if (n > env.max_cfg_preds)
368 env.max_cfg_preds = n;
370 ir_mode *mode = get_irn_mode(irn);
372 if (mode_is_reference(mode)) {
374 * Register ALL possible addresses: this is overkill yet but
375 * simpler then doing it for all possible translated addresses
376 * (which would be sufficient in the moment.
378 (void)register_address(irn);
381 } /* prepare_blocks */
384 * Post-Walker, link in all Phi's
386 static void link_phis(ir_node *irn, void *ctx) {
390 ir_node *block = get_nodes_block(irn);
391 add_Block_phi(block, irn);
396 * Block walker: creates the inverse post-order list for the CFG.
398 static void inverse_post_order(ir_node *block, void *ctx) {
399 block_t *entry = get_block_entry(block);
403 /* mark this block IS reachable from start */
404 set_Block_mark(block, 1);
406 /* create the list in inverse order */
407 entry->forward_next = env.forward;
410 /* remember the first visited (last in list) entry, needed for later */
411 if (env.backward == NULL)
412 env.backward = entry;
413 } /* inverse_post_order */
416 * Block walker: create backward links for the memops of a block.
418 static void collect_backward(ir_node *block, void *ctx) {
419 block_t *entry = get_block_entry(block);
425 * Do NOT link in the end block yet. We want it to be
426 * the first in the list. This is NOT guaranteed by the walker
427 * if we have endless loops.
429 if (block != env.end_bl) {
430 entry->backward_next = env.backward;
432 /* create the list in inverse order */
433 env.backward = entry;
436 /* create backward links for all memory ops */
438 for (op = entry->memop_forward; op != NULL; op = op->next) {
442 entry->memop_backward = last;
443 } /* collect_backward */
448 * @param irn the IR-node representing the memop or NULL
449 * if this is a translated (virtual) memop
451 * @return the allocated memop
453 static memop_t *alloc_memop(ir_node *irn) {
454 memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
456 m->value.address = NULL;
457 m->value.value = NULL;
458 m->value.mode = NULL;
466 memset(m->projs, 0, sizeof(m->projs));
469 set_irn_link(irn, m);
474 * Create a memop for a Phi-replacement.
476 * @param op the memop to clone
477 * @param phi the Phi-node representing the new value
479 static memop_t *clone_memop_phi(memop_t *op, ir_node *phi) {
480 memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
482 m->value = op->value;
483 m->value.value = phi;
490 set_irn_link(phi, m);
492 } /* clone_memop_phi */
495 * Return the memory properties of a call node.
497 * @param call the call node
499 * return a bitset of mtp_property_const and mtp_property_pure
501 static unsigned get_Call_memory_properties(ir_node *call) {
502 ir_type *call_tp = get_Call_type(call);
503 unsigned prop = get_method_additional_properties(call_tp);
505 /* check first the call type */
506 if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
507 /* try the called entity */
508 ir_node *ptr = get_Call_ptr(call);
510 if (is_Global(ptr)) {
511 ir_entity *ent = get_Global_entity(ptr);
513 prop = get_entity_additional_properties(ent);
516 return prop & (mtp_property_const|mtp_property_pure);
517 } /* get_Call_memory_properties */
520 * Returns an entity if the address ptr points to a constant one.
522 * @param ptr the address
524 * @return an entity or NULL
526 static ir_entity *find_constant_entity(ir_node *ptr) {
528 if (is_SymConst(ptr) && get_SymConst_kind(ptr) == symconst_addr_ent) {
529 return get_SymConst_entity(ptr);
530 } else if (is_Sel(ptr)) {
531 ir_entity *ent = get_Sel_entity(ptr);
532 ir_type *tp = get_entity_owner(ent);
534 /* Do not fiddle with polymorphism. */
535 if (is_Class_type(get_entity_owner(ent)) &&
536 ((get_entity_n_overwrites(ent) != 0) ||
537 (get_entity_n_overwrittenby(ent) != 0) ) )
540 if (is_Array_type(tp)) {
544 for (i = 0, n = get_Sel_n_indexs(ptr); i < n; ++i) {
546 tarval *tlower, *tupper;
547 ir_node *index = get_Sel_index(ptr, i);
548 tarval *tv = computed_value(index);
550 /* check if the index is constant */
551 if (tv == tarval_bad)
554 bound = get_array_lower_bound(tp, i);
555 tlower = computed_value(bound);
556 bound = get_array_upper_bound(tp, i);
557 tupper = computed_value(bound);
559 if (tlower == tarval_bad || tupper == tarval_bad)
562 if (tarval_cmp(tv, tlower) & pn_Cmp_Lt)
564 if (tarval_cmp(tupper, tv) & pn_Cmp_Lt)
567 /* ok, bounds check finished */
571 if (variability_constant == get_entity_variability(ent))
575 ptr = get_Sel_ptr(ptr);
576 } else if (is_Add(ptr)) {
577 ir_node *l = get_Add_left(ptr);
578 ir_node *r = get_Add_right(ptr);
580 if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
582 else if (get_irn_mode(r) == get_irn_mode(ptr) && is_Const(l))
587 /* for now, we support only one addition, reassoc should fold all others */
588 if (! is_SymConst(ptr) && !is_Sel(ptr))
590 } else if (is_Sub(ptr)) {
591 ir_node *l = get_Sub_left(ptr);
592 ir_node *r = get_Sub_right(ptr);
594 if (get_irn_mode(l) == get_irn_mode(ptr) && is_Const(r))
598 /* for now, we support only one subtraction, reassoc should fold all others */
599 if (! is_SymConst(ptr) && !is_Sel(ptr))
604 } /* find_constant_entity */
607 * Return the Selection index of a Sel node from dimension n
609 static long get_Sel_array_index_long(ir_node *n, int dim) {
610 ir_node *index = get_Sel_index(n, dim);
611 assert(is_Const(index));
612 return get_tarval_long(get_Const_tarval(index));
613 } /* get_Sel_array_index_long */
616 * Returns the accessed component graph path for an
617 * node computing an address.
619 * @param ptr the node computing the address
620 * @param depth current depth in steps upward from the root
623 static compound_graph_path *rec_get_accessed_path(ir_node *ptr, int depth) {
624 compound_graph_path *res = NULL;
625 ir_entity *root, *field, *ent;
626 int path_len, pos, idx;
630 if (is_SymConst(ptr)) {
631 /* a SymConst. If the depth is 0, this is an access to a global
632 * entity and we don't need a component path, else we know
633 * at least its length.
635 assert(get_SymConst_kind(ptr) == symconst_addr_ent);
636 root = get_SymConst_entity(ptr);
637 res = (depth == 0) ? NULL : new_compound_graph_path(get_entity_type(root), depth);
638 } else if (is_Sel(ptr)) {
639 /* it's a Sel, go up until we find the root */
640 res = rec_get_accessed_path(get_Sel_ptr(ptr), depth+1);
644 /* fill up the step in the path at the current position */
645 field = get_Sel_entity(ptr);
646 path_len = get_compound_graph_path_length(res);
647 pos = path_len - depth - 1;
648 set_compound_graph_path_node(res, pos, field);
650 if (is_Array_type(get_entity_owner(field))) {
651 assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
652 set_compound_graph_path_array_index(res, pos, get_Sel_array_index_long(ptr, 0));
654 } else if (is_Add(ptr)) {
655 ir_node *l = get_Add_left(ptr);
656 ir_node *r = get_Add_right(ptr);
657 ir_mode *mode = get_irn_mode(ptr);
660 if (is_Const(r) && get_irn_mode(l) == mode) {
662 tv = get_Const_tarval(r);
665 tv = get_Const_tarval(l);
668 mode = get_tarval_mode(tv);
671 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
673 field = get_Sel_entity(ptr);
675 field = get_SymConst_entity(ptr);
678 for (ent = field;;) {
680 tarval *sz, *tv_index, *tlower, *tupper;
683 tp = get_entity_type(ent);
684 if (! is_Array_type(tp))
686 ent = get_array_element_entity(tp);
687 size = get_type_size_bytes(get_entity_type(ent));
688 sz = new_tarval_from_long(size, mode);
690 tv_index = tarval_div(tmp, sz);
691 tmp = tarval_mod(tmp, sz);
693 if (tv_index == tarval_bad || tmp == tarval_bad)
696 assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
697 bound = get_array_lower_bound(tp, 0);
698 tlower = computed_value(bound);
699 bound = get_array_upper_bound(tp, 0);
700 tupper = computed_value(bound);
702 if (tlower == tarval_bad || tupper == tarval_bad)
705 if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
707 if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
710 /* ok, bounds check finished */
713 if (! tarval_is_null(tmp)) {
714 /* access to some struct/union member */
718 /* should be at least ONE array */
722 res = rec_get_accessed_path(ptr, depth + idx);
726 path_len = get_compound_graph_path_length(res);
727 pos = path_len - depth - idx;
729 for (ent = field;;) {
731 tarval *sz, *tv_index;
734 tp = get_entity_type(ent);
735 if (! is_Array_type(tp))
737 ent = get_array_element_entity(tp);
738 set_compound_graph_path_node(res, pos, ent);
740 size = get_type_size_bytes(get_entity_type(ent));
741 sz = new_tarval_from_long(size, mode);
743 tv_index = tarval_div(tv, sz);
744 tv = tarval_mod(tv, sz);
746 /* worked above, should work again */
747 assert(tv_index != tarval_bad && tv != tarval_bad);
749 /* bounds already checked above */
750 index = get_tarval_long(tv_index);
751 set_compound_graph_path_array_index(res, pos, index);
754 } else if (is_Sub(ptr)) {
755 ir_node *l = get_Sub_left(ptr);
756 ir_node *r = get_Sub_right(ptr);
759 tv = get_Const_tarval(r);
764 } /* rec_get_accessed_path */
767 * Returns an access path or NULL. The access path is only
768 * valid, if the graph is in phase_high and _no_ address computation is used.
770 static compound_graph_path *get_accessed_path(ir_node *ptr) {
771 compound_graph_path *gr = rec_get_accessed_path(ptr, 0);
773 } /* get_accessed_path */
775 typedef struct path_entry {
777 struct path_entry *next;
781 static ir_node *rec_find_compound_ent_value(ir_node *ptr, path_entry *next) {
782 path_entry entry, *p;
783 ir_entity *ent, *field;
784 ir_initializer_t *initializer;
790 if (is_SymConst(ptr)) {
792 ent = get_SymConst_entity(ptr);
793 initializer = get_entity_initializer(ent);
794 for (p = next; p != NULL;) {
795 if (initializer->kind != IR_INITIALIZER_COMPOUND)
797 n = get_initializer_compound_n_entries(initializer);
798 tp = get_entity_type(ent);
800 if (is_Array_type(tp)) {
801 ent = get_array_element_entity(tp);
806 initializer = get_initializer_compound_value(initializer, 0);
810 if (p->index >= (int) n)
812 initializer = get_initializer_compound_value(initializer, p->index);
817 tp = get_entity_type(ent);
818 while (is_Array_type(tp)) {
819 ent = get_array_element_entity(tp);
820 tp = get_entity_type(ent);
822 n = get_initializer_compound_n_entries(initializer);
825 initializer = get_initializer_compound_value(initializer, 0);
828 switch (initializer->kind) {
829 case IR_INITIALIZER_CONST:
830 return get_initializer_const_value(initializer);
831 case IR_INITIALIZER_TARVAL:
832 case IR_INITIALIZER_NULL:
836 } else if (is_Sel(ptr)) {
837 entry.ent = field = get_Sel_entity(ptr);
838 tp = get_entity_owner(field);
839 if (is_Array_type(tp)) {
840 assert(get_Sel_n_indexs(ptr) == 1 && "multi dim arrays not implemented");
841 entry.index = get_Sel_array_index_long(ptr, 0) - get_array_lower_bound_int(tp, 0);
843 int i, n_members = get_compound_n_members(tp);
844 for (i = 0; i < n_members; ++i) {
845 if (get_compound_member(tp, i) == field)
848 if (i >= n_members) {
849 /* not found: should NOT happen */
854 return rec_find_compound_ent_value(get_Sel_ptr(ptr), &entry);
855 } else if (is_Add(ptr)) {
856 ir_node *l = get_Add_left(ptr);
857 ir_node *r = get_Add_right(ptr);
863 tv = get_Const_tarval(r);
866 tv = get_Const_tarval(l);
869 mode = get_tarval_mode(tv);
871 /* ptr must be a Sel or a SymConst, this was checked in find_constant_entity() */
873 field = get_Sel_entity(ptr);
875 field = get_SymConst_entity(ptr);
878 /* count needed entries */
880 for (ent = field;;) {
881 tp = get_entity_type(ent);
882 if (! is_Array_type(tp))
884 ent = get_array_element_entity(tp);
887 /* should be at least ONE entry */
891 /* allocate the right number of entries */
892 NEW_ARR_A(path_entry, p, pos);
896 for (ent = field;;) {
898 tarval *sz, *tv_index, *tlower, *tupper;
902 tp = get_entity_type(ent);
903 if (! is_Array_type(tp))
905 ent = get_array_element_entity(tp);
907 p[pos].next = &p[pos + 1];
909 size = get_type_size_bytes(get_entity_type(ent));
910 sz = new_tarval_from_long(size, mode);
912 tv_index = tarval_div(tv, sz);
913 tv = tarval_mod(tv, sz);
915 if (tv_index == tarval_bad || tv == tarval_bad)
918 assert(get_array_n_dimensions(tp) == 1 && "multiarrays not implemented");
919 bound = get_array_lower_bound(tp, 0);
920 tlower = computed_value(bound);
921 bound = get_array_upper_bound(tp, 0);
922 tupper = computed_value(bound);
924 if (tlower == tarval_bad || tupper == tarval_bad)
927 if (tarval_cmp(tv_index, tlower) & pn_Cmp_Lt)
929 if (tarval_cmp(tupper, tv_index) & pn_Cmp_Lt)
932 /* ok, bounds check finished */
933 index = get_tarval_long(tv_index);
934 p[pos].index = index;
937 if (! tarval_is_null(tv)) {
938 /* hmm, wrong access */
941 p[pos - 1].next = next;
942 return rec_find_compound_ent_value(ptr, p);
943 } else if (is_Sub(ptr)) {
944 ir_node *l = get_Sub_left(ptr);
945 ir_node *r = get_Sub_right(ptr);
948 tv = get_Const_tarval(r);
953 } /* rec_find_compound_ent_value */
955 static ir_node *find_compound_ent_value(ir_node *ptr) {
956 return rec_find_compound_ent_value(ptr, NULL);
957 } /* find_compound_ent_value */
960 * Mark a Load memop to be replace by a definition
962 * @param op the Load memop
964 static void mark_replace_load(memop_t *op, ir_node *def) {
966 op->flags |= FLAG_KILLED_NODE;
968 } /* mark_replace_load */
971 * Mark a Store memop to be removed.
973 * @param op the Store memop
975 static void mark_remove_store(memop_t *op) {
976 op->flags |= FLAG_KILLED_NODE;
978 } /* mark_remove_store */
981 * Update a memop for a Load.
985 static void update_Load_memop(memop_t *m) {
987 ir_node *load = m->node;
991 if (get_Load_volatility(load) == volatility_is_volatile)
992 m->flags |= FLAG_IGNORE;
994 ptr = get_Load_ptr(load);
996 m->value.address = ptr;
998 for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
999 ir_node *proj = get_irn_out(load, i);
1002 /* beware of keep edges */
1006 pn = get_Proj_proj(proj);
1007 m->projs[pn] = proj;
1010 m->value.value = proj;
1011 m->value.mode = get_irn_mode(proj);
1013 case pn_Load_X_except:
1014 m->flags |= FLAG_EXCEPTION;
1019 case pn_Load_X_regular:
1022 panic("Unsupported Proj from Load %+F", proj);
1026 /* check if we can determine the entity that will be loaded */
1027 ent = find_constant_entity(ptr);
1030 allocation_static == get_entity_allocation(ent) &&
1031 visibility_external_allocated != get_entity_visibility(ent)) {
1032 /* a static allocation that is not external: there should be NO exception
1033 * when loading even if we cannot replace the load itself. */
1034 ir_node *value = NULL;
1036 /* no exception, clear the m fields as it might be checked later again */
1037 if (m->projs[pn_Load_X_except]) {
1038 exchange(m->projs[pn_Load_X_except], new_Bad());
1039 m->projs[pn_Load_X_except] = NULL;
1040 m->flags &= ~FLAG_EXCEPTION;
1043 if (m->projs[pn_Load_X_regular]) {
1044 exchange(m->projs[pn_Load_X_regular], new_r_Jmp(get_nodes_block(load)));
1045 m->projs[pn_Load_X_regular] = NULL;
1049 if (variability_constant == get_entity_variability(ent)) {
1050 if (is_atomic_entity(ent)) {
1051 /* Might not be atomic after lowering of Sels. In this case we
1052 * could also load, but it's more complicated. */
1053 /* more simpler case: we load the content of a constant value:
1054 * replace it by the constant itself */
1055 value = get_atomic_ent_value(ent);
1056 } else if (ent->has_initializer) {
1057 /* new style initializer */
1058 value = find_compound_ent_value(ptr);
1060 /* old style initializer */
1061 compound_graph_path *path = get_accessed_path(ptr);
1064 assert(is_proper_compound_graph_path(path, get_compound_graph_path_length(path)-1));
1066 value = get_compound_ent_value_by_path(ent, path);
1067 DB((dbg, LEVEL_1, " Constant access at %F%F resulted in %+F\n", ent, path, value));
1068 free_compound_graph_path(path);
1072 value = can_replace_load_by_const(load, value);
1075 if (value != NULL) {
1076 /* we completely replace the load by this value */
1077 DB((dbg, LEVEL_1, "Replacing Load %+F by constant %+F\n", m->node, value));
1078 mark_replace_load(m, value);
1083 if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
1084 /* only create an address if this node is NOT killed immediately or ignored */
1085 m->value.id = register_address(ptr);
1088 /* no user, KILL it */
1089 mark_replace_load(m, NULL);
1091 } /* update_Load_memop */
1094 * Update a memop for a Store.
1096 * @param m the memop
1098 static void update_Store_memop(memop_t *m) {
1100 ir_node *store = m->node;
1101 ir_node *adr = get_Store_ptr(store);
1103 if (get_Store_volatility(store) == volatility_is_volatile) {
1104 m->flags |= FLAG_IGNORE;
1106 /* only create an address if this node is NOT ignored */
1107 m->value.id = register_address(adr);
1111 m->value.address = adr;
1113 for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
1114 ir_node *proj = get_irn_out(store, i);
1117 /* beware of keep edges */
1121 pn = get_Proj_proj(proj);
1122 m->projs[pn] = proj;
1124 case pn_Store_X_except:
1125 m->flags |= FLAG_EXCEPTION;
1130 case pn_Store_X_regular:
1133 panic("Unsupported Proj from Store %+F", proj);
1136 m->value.value = get_Store_value(store);
1137 m->value.mode = get_irn_mode(m->value.value);
1138 } /* update_Store_memop */
1141 * Update a memop for a Call.
1143 * @param m the memop
1145 static void update_Call_memop(memop_t *m) {
1146 ir_node *call = m->node;
1147 unsigned prop = get_Call_memory_properties(call);
1150 if (prop & mtp_property_const) {
1151 /* A constant call did NOT use memory at all, we
1152 can kick it from the list. */
1153 } else if (prop & mtp_property_pure) {
1154 /* pure calls READ memory */
1157 m->flags = FLAG_KILL_ALL;
1159 for (i = get_irn_n_outs(call) - 1; i >= 0; --i) {
1160 ir_node *proj = get_irn_out(call, i);
1162 /* beware of keep edges */
1166 switch (get_Proj_proj(proj)) {
1167 case pn_Call_X_except:
1168 m->flags |= FLAG_EXCEPTION;
1170 case pn_Call_M_regular:
1175 } /* update_Call_memop */
1178 * Update a memop for a Div/Mod/Quot/DivMod.
1180 * @param m the memop
1182 static void update_DivOp_memop(memop_t *m) {
1183 ir_node *div = m->node;
1186 for (i = get_irn_n_outs(div) - 1; i >= 0; --i) {
1187 ir_node *proj = get_irn_out(div, i);
1189 /* beware of keep edges */
1193 switch (get_Proj_proj(proj)) {
1194 case pn_Generic_X_except:
1195 m->flags |= FLAG_EXCEPTION;
1197 case pn_Generic_M_regular:
1202 } /* update_DivOp_memop */
1205 * Update a memop for a Phi.
1207 * @param m the memop
1209 static void update_Phi_memop(memop_t *m) {
1210 /* the Phi is it's own mem */
1212 } /* update_Phi_memop */
1215 * Memory walker: collect all memory ops and build topological lists.
1217 static void collect_memops(ir_node *irn, void *ctx) {
1224 /* we can safely ignore ProjM's except the initial memory */
1225 if (irn != get_irg_initial_mem(current_ir_graph))
1229 op = alloc_memop(irn);
1230 block = get_nodes_block(irn);
1231 entry = get_block_entry(block);
1234 update_Phi_memop(op);
1235 /* Phis must be always placed first */
1236 op->next = entry->memop_forward;
1237 entry->memop_forward = op;
1238 if (entry->memop_backward == NULL)
1239 entry->memop_backward = op;
1241 switch (get_irn_opcode(irn)) {
1243 update_Load_memop(op);
1246 update_Store_memop(op);
1249 update_Call_memop(op);
1256 /* initial memory */
1261 /* we can those to find the memory edge */
1267 update_DivOp_memop(op);
1271 /* TODO: handle some builtins */
1273 /* unsupported operation */
1274 op->flags = FLAG_KILL_ALL;
1278 /* all other should be placed last */
1279 if (entry->memop_backward == NULL) {
1280 entry->memop_forward = entry->memop_backward = op;
1282 entry->memop_backward->next = op;
1283 entry->memop_backward = op;
1286 } /* collect_memops */
1289 * Find an address in the current set.
1291 * @param value the value to be searched for
1293 * @return a memop for the value or NULL if the value does
1294 * not exists in the set or cannot be converted into
1295 * the requested mode
1297 static memop_t *find_address(const value_t *value) {
1298 if (rbitset_is_set(env.curr_set, value->id)) {
1299 memop_t *res = env.curr_id_2_memop[value->id];
1301 if (res->value.mode == value->mode)
1303 /* allow hidden casts */
1304 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1305 get_mode_arithmetic(value->mode) == irma_twos_complement &&
1306 get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
1310 } /* find_address */
1313 * Find an address in the avail_out set.
1315 * @param bl the block
1317 static memop_t *find_address_avail(const block_t *bl, unsigned id, const ir_mode *mode) {
1318 if (rbitset_is_set(bl->avail_out, id)) {
1319 memop_t *res = bl->id_2_memop_avail[id];
1321 if (res->value.mode == mode)
1323 /* allow hidden casts */
1324 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
1325 get_mode_arithmetic(mode) == irma_twos_complement &&
1326 get_mode_size_bits(res->value.mode) == get_mode_size_bits(mode))
1330 } /* find_address_avail */
1333 * Kill all addresses from the current set.
1335 static void kill_all(void) {
1336 rbitset_clear_all(env.curr_set, env.rbs_size);
1339 rbitset_set(env.curr_set, env.rbs_size - 1);
1343 * Kill memops that are not alias free due to a Store value from the current set.
1345 * @param value the Store value
1347 static void kill_memops(const value_t *value) {
1348 unsigned end = env.rbs_size - 1;
1351 for (pos = rbitset_next(env.curr_set, 0, 1); pos < end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1352 memop_t *op = env.curr_id_2_memop[pos];
1354 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
1355 op->value.address, op->value.mode)) {
1356 rbitset_clear(env.curr_set, pos);
1357 env.curr_id_2_memop[pos] = NULL;
1358 DB((dbg, LEVEL_2, "KILLING %+F because of possible alias address %+F\n", op->node, value->address));
1364 * Add the value of a memop to the current set.
1366 * @param op the memory op
1368 static void add_memop(memop_t *op) {
1369 rbitset_set(env.curr_set, op->value.id);
1370 env.curr_id_2_memop[op->value.id] = op;
1374 * Add the value of a memop to the avail_out set.
1376 * @param bl the block
1377 * @param op the memory op
1379 static void add_memop_avail(block_t *bl, memop_t *op) {
1380 rbitset_set(bl->avail_out, op->value.id);
1381 bl->id_2_memop_avail[op->value.id] = op;
1382 } /* add_memop_avail */
1385 * Check, if we can convert a value of one mode to another mode
1386 * without changing the representation of bits.
1388 * @param from the original mode
1389 * @param to the destination mode
1391 static int can_convert_to(const ir_mode *from, const ir_mode *to) {
1392 if (get_mode_arithmetic(from) == irma_twos_complement &&
1393 get_mode_arithmetic(to) == irma_twos_complement &&
1394 get_mode_size_bits(from) == get_mode_size_bits(to))
1397 } /* can_convert_to */
1400 * Add a Conv to the requested mode if needed.
1402 * @param irn the IR-node to convert
1403 * @param mode the destination mode
1405 * @return the possible converted node or NULL
1406 * if the conversion is not possible
1408 static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
1409 ir_mode *other = get_irn_mode(irn);
1410 if (other != mode) {
1411 /* different modes: check if conversion is possible without changing the bits */
1412 if (can_convert_to(other, mode)) {
1413 ir_node *block = get_nodes_block(irn);
1414 return new_r_Conv(block, irn, mode);
1416 /* otherwise not possible ... yet */
1423 * Update the address of an value if this address was a load result
1424 * and the load is killed now.
1426 * @param value the value whose address is updated
1428 static void update_address(value_t *value) {
1429 if (is_Proj(value->address)) {
1430 ir_node *load = get_Proj_pred(value->address);
1432 if (is_Load(load)) {
1433 const memop_t *op = get_irn_memop(load);
1435 if (op->flags & FLAG_KILLED_NODE)
1436 value->address = op->replace;
1439 } /* update_address */
1442 * Do forward dataflow analysis on the given block and calculate the
1443 * GEN and KILL in the current (avail) set.
1445 * @param bl the block
1447 static void calc_gen_kill_avail(block_t *bl) {
1451 for (op = bl->memop_forward; op != NULL; op = op->next) {
1452 switch (get_irn_opcode(op->node)) {
1460 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1461 /* do we have this already? */
1464 update_address(&op->value);
1465 other = find_address(&op->value);
1466 if (other != NULL && other != op) {
1467 def = conv_to(other->value.value, op->value.mode);
1469 #ifdef DEBUG_libfirm
1470 if (is_Store(other->node)) {
1472 DB((dbg, LEVEL_1, "RAW %+F <- %+F(%+F)\n", op->node, def, other->node));
1475 DB((dbg, LEVEL_1, "RAR %+F <- %+F(%+F)\n", op->node, def, other->node));
1478 mark_replace_load(op, def);
1479 /* do NOT change the memop table */
1483 /* add this value */
1488 if (! (op->flags & FLAG_KILLED_NODE)) {
1489 /* do we have this store already */
1492 update_address(&op->value);
1493 other = find_address(&op->value);
1494 if (other != NULL) {
1495 if (is_Store(other->node)) {
1496 if (op != other && !(other->flags & FLAG_IGNORE) &&
1497 get_nodes_block(other->node) == get_nodes_block(op->node)) {
1499 * A WAW in the same block we can kick the first store.
1500 * This is a shortcut: we know that the second Store will be anticipated
1503 DB((dbg, LEVEL_1, "WAW %+F <- %+F\n", other->node, op->node));
1504 mark_remove_store(other);
1505 /* FIXME: a Load might be get freed due to this killed store */
1507 } else if (other->value.value == op->value.value && !(op->flags & FLAG_IGNORE)) {
1509 DB((dbg, LEVEL_1, "WAR %+F <- %+F\n", op->node, other->node));
1510 mark_remove_store(op);
1511 /* do NOT change the memop table */
1515 /* KILL all possible aliases */
1516 kill_memops(&op->value);
1517 /* add this value */
1522 if (op->flags & FLAG_KILL_ALL)
1526 } /* calc_gen_kill_avail */
1528 #define BYTE_SIZE(x) (((x) + 7) >> 3)
1531 * Do forward dataflow analysis on a given block to calculate the avail_out set
1532 * for this block only.
1534 * @param block the block
1536 static void forward_avail(block_t *bl) {
1537 /* fill the data from the current block */
1538 env.curr_id_2_memop = bl->id_2_memop_avail;
1539 env.curr_set = bl->avail_out;
1541 calc_gen_kill_avail(bl);
1542 dump_curr(bl, "Avail_out");
1543 } /* forward_avail */
1546 * Do backward dataflow analysis on a given block to calculate the antic set
1547 * of Loaded addresses.
1549 * @param bl the block
1551 * @return non-zero if the set has changed since last iteration
1553 static int backward_antic(block_t *bl) {
1555 ir_node *block = bl->block;
1556 int n = get_Block_n_cfg_outs(block);
1559 ir_node *succ = get_Block_cfg_out(block, 0);
1560 block_t *succ_bl = get_block_entry(succ);
1561 int pred_pos = get_Block_cfgpred_pos(succ, block);
1562 unsigned end = env.rbs_size - 1;
1567 if (bl->trans_results == NULL) {
1568 /* allocate the translate cache */
1569 unsigned size = env.curr_adr_id * sizeof(bl->trans_results[0]);
1570 bl->trans_results = obstack_alloc(&env.obst, size);
1571 memset(bl->trans_results, 0, size);
1574 /* check for partly redundant values */
1575 for (pos = rbitset_next(succ_bl->anticL_in, 0, 1);
1577 pos = rbitset_next(succ_bl->anticL_in, pos + 1, 1)) {
1579 * do Phi-translation here: Note that at this point the nodes are
1580 * not changed, so we can safely cache the results.
1581 * However: Loads of Load results ARE bad, because we have no way
1582 to translate them yet ...
1584 memop_t *op = bl->trans_results[pos];
1586 /* not yet translated */
1587 ir_node *adr, *trans_adr;
1589 op = succ_bl->id_2_memop_antic[pos];
1590 adr = op->value.address;
1592 trans_adr = phi_translate(adr, succ, pred_pos);
1593 if (trans_adr != adr) {
1594 /* create a new entry for the translated one */
1597 new_op = alloc_memop(NULL);
1598 new_op->value.address = trans_adr;
1599 new_op->value.id = register_address(trans_adr);
1600 new_op->value.mode = op->value.mode;
1601 new_op->node = op->node; /* we need the node to decide if Load/Store */
1602 new_op->flags = op->flags;
1604 bl->trans_results[pos] = new_op;
1608 env.curr_id_2_memop[op->value.id] = op;
1609 rbitset_set(env.curr_set, op->value.id);
1612 ir_node *succ = get_Block_cfg_out(block, 0);
1613 block_t *succ_bl = get_block_entry(succ);
1616 rbitset_copy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1617 memcpy(env.curr_id_2_memop, succ_bl->id_2_memop_antic, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1619 /* Hmm: probably we want kill merges of Loads ans Stores here */
1620 for (i = n - 1; i > 0; --i) {
1621 ir_node *succ = get_Block_cfg_out(bl->block, i);
1622 block_t *succ_bl = get_block_entry(succ);
1624 rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1627 /* block ends with a noreturn call */
1631 dump_curr(bl, "AnticL_out");
1633 for (op = bl->memop_backward; op != NULL; op = op->prev) {
1634 switch (get_irn_opcode(op->node)) {
1642 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1648 if (! (op->flags & FLAG_KILLED_NODE)) {
1649 /* a Store: check which memops must be killed */
1650 kill_memops(&op->value);
1654 if (op->flags & FLAG_KILL_ALL)
1659 memcpy(bl->id_2_memop_antic, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1660 if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1662 rbitset_copy(bl->anticL_in, env.curr_set, env.rbs_size);
1663 dump_curr(bl, "AnticL_in*");
1666 dump_curr(bl, "AnticL_in");
1668 } /* backward_antic */
1671 * Replace a Load memop by a already known value.
1673 * @param op the Load memop
1675 static void replace_load(memop_t *op) {
1676 ir_node *load = op->node;
1677 ir_node *def = skip_Id(op->replace);
1682 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1684 if (op->flags & FLAG_EXCEPTION) {
1685 /* bad: this node is unused and executed for exception only */
1686 DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1689 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1692 if (op->mem != NULL) {
1693 /* in rare cases a Load might have NO memory */
1694 exchange(op->mem, get_Load_mem(load));
1696 proj = op->projs[pn_Load_res];
1698 mode = get_irn_mode(proj);
1699 if (get_irn_mode(def) != mode) {
1701 dbg_info *db = get_irn_dbg_info(load);
1702 ir_node *block = get_nodes_block(proj);
1703 def = new_rd_Conv(db, block, def, mode);
1705 exchange(proj, def);
1707 proj = op->projs[pn_Load_X_except];
1709 exchange(proj, new_Bad());
1711 proj = op->projs[pn_Load_X_regular];
1713 exchange(proj, new_r_Jmp(get_nodes_block(load)));
1715 } /* replace_load */
1718 * Remove a Store memop.
1720 * @param op the Store memop
1722 static void remove_store(memop_t *op) {
1723 ir_node *store = op->node;
1726 DB((dbg, LEVEL_1, "Removing %+F\n", store));
1728 if (op->mem != NULL) {
1729 /* in rare cases a Store might have no memory */
1730 exchange(op->mem, get_Store_mem(store));
1732 proj = op->projs[pn_Store_X_except];
1734 exchange(proj, new_Bad());
1736 proj = op->projs[pn_Store_X_regular];
1738 exchange(proj, new_r_Jmp(get_nodes_block(store)));
1740 } /* remove_store */
1744 * Do all necessary replacements for a given block.
1746 * @param bl the block
1748 static void do_replacements(block_t *bl) {
1751 for (op = bl->memop_forward; op != NULL; op = op->next) {
1752 if (op->flags & FLAG_KILLED_NODE) {
1753 switch (get_irn_opcode(op->node)) {
1763 } /* do_replacements */
1766 * Calculate the Avail_out sets for all basic blocks.
1768 static void calcAvail(void) {
1769 memop_t **tmp_memop = env.curr_id_2_memop;
1770 unsigned *tmp_set = env.curr_set;
1773 /* calculate avail_out */
1774 DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1776 /* iterate over all blocks in in any order, skip the start block */
1777 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1781 /* restore the current sets */
1782 env.curr_id_2_memop = tmp_memop;
1783 env.curr_set = tmp_set;
1787 * Calculate the Antic_in sets for all basic blocks.
1789 static void calcAntic(void) {
1792 /* calculate antic_out */
1793 DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1798 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1802 /* over all blocks in reverse post order */
1803 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1804 need_iter |= backward_antic(bl);
1807 } while (need_iter);
1808 DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1812 * Return the node representing the last memory in a block.
1814 * @param bl the block
1816 static ir_node *find_last_memory(block_t *bl) {
1818 if (bl->memop_backward != NULL) {
1819 return bl->memop_backward->mem;
1821 /* if there is NO memory in this block, go to the dominator */
1822 bl = get_block_entry(get_Block_idom(bl->block));
1824 } /* find_last_memory */
1827 * Reroute all memory users of old memory
1828 * to a new memory IR-node.
1830 * @param omem the old memory IR-node
1831 * @param nmem the new memory IR-node
1833 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
1836 for (i = get_irn_n_outs(omem) - 1; i >= 0; --i) {
1838 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1840 set_irn_n(user, n_pos, nmem);
1843 /* all edges previously point to omem now point to nmem */
1844 nmem->out = omem->out;
1845 } /* reroute_all_mem_users */
1848 * Reroute memory users of old memory that are dominated by a given block
1849 * to a new memory IR-node.
1851 * @param omem the old memory IR-node
1852 * @param nmem the new memory IR-node
1853 * @param pass_bl the block the memory must pass
1855 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl) {
1856 int i, j, n = get_irn_n_outs(omem);
1857 ir_def_use_edge *edges = NEW_ARR_D(ir_def_use_edge, &env.obst, n + 1);
1859 for (i = j = 0; i < n; ++i) {
1861 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1862 ir_node *use_bl = get_nodes_block(user);
1866 use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1868 if (block_dominates(pass_bl, use_bl)) {
1869 /* found an user that is dominated */
1871 edges[j].pos = n_pos;
1872 edges[j].use = user;
1874 set_irn_n(user, n_pos, nmem);
1878 /* Modify the out structure: we create a new out edge array on our
1879 temporary obstack here. This should be no problem, as we invalidate the edges
1880 at the end either. */
1881 /* first entry is used for the length */
1884 } /* reroute_mem_through */
1887 * insert Loads, making partly redundant Loads fully redundant
1889 static int insert_Load(block_t *bl) {
1890 ir_node *block = bl->block;
1891 int i, n = get_Block_n_cfgpreds(block);
1892 unsigned end = env.rbs_size - 1;
1895 DB((dbg, LEVEL_3, "processing %+F\n", block));
1898 /* might still happen for an unreachable block (end for instance) */
1906 NEW_ARR_A(ir_node *, ins, n);
1908 rbitset_set_all(env.curr_set, env.rbs_size);
1910 /* More than one predecessors, calculate the join for all avail_outs ignoring unevaluated
1911 Blocks. These put in Top anyway. */
1912 for (i = n - 1; i >= 0; --i) {
1913 ir_node *pred = skip_Proj(get_Block_cfgpred(block, i));
1914 ir_node *blk = get_nodes_block(pred);
1917 pred_bl = get_block_entry(blk);
1918 rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
1920 if (is_Load(pred) || is_Store(pred)) {
1921 /* We reached this block by an exception from a Load or Store:
1922 * the memop creating the exception was NOT completed than, kill it
1924 memop_t *exc_op = get_irn_memop(pred);
1925 rbitset_clear(env.curr_set, exc_op->value.id);
1930 * Ensure that all values are in the map: build Phi's if necessary:
1931 * Note: the last bit is the sentinel and ALWAYS set, so start with -2.
1933 for (pos = env.rbs_size - 2; pos >= 0; --pos) {
1934 if (! rbitset_is_set(env.curr_set, pos))
1935 env.curr_id_2_memop[pos] = NULL;
1937 ir_node *pred = get_Block_cfgpred_block(bl->block, 0);
1938 block_t *pred_bl = get_block_entry(pred);
1940 memop_t *first = NULL;
1941 ir_mode *mode = NULL;
1943 for (i = 0; i < n; ++i) {
1946 pred = get_Block_cfgpred_block(bl->block, i);
1947 pred_bl = get_block_entry(pred);
1949 mop = pred_bl->id_2_memop_avail[pos];
1950 if (first == NULL) {
1952 ins[0] = first->value.value;
1953 mode = get_irn_mode(ins[0]);
1955 /* no Phi needed so far */
1956 env.curr_id_2_memop[pos] = first;
1958 ins[i] = conv_to(mop->value.value, mode);
1959 if (ins[i] != ins[0]) {
1960 if (ins[i] == NULL) {
1961 /* conversion failed */
1962 env.curr_id_2_memop[pos] = NULL;
1963 rbitset_clear(env.curr_set, pos);
1972 ir_node *phi = new_r_Phi(bl->block, n, ins, mode);
1973 memop_t *phiop = alloc_memop(phi);
1975 phiop->value = first->value;
1976 phiop->value.value = phi;
1978 /* no need to link it in, as it is a DATA phi */
1980 env.curr_id_2_memop[pos] = phiop;
1982 DB((dbg, LEVEL_3, "Created new %+F on merging value for address %+F\n", phi, first->value.address));
1987 /* only one predecessor, simply copy the map */
1988 ir_node *pred = get_Block_cfgpred_block(bl->block, 0);
1989 block_t *pred_bl = get_block_entry(pred);
1991 rbitset_copy(env.curr_set, pred_bl->avail_out, env.rbs_size);
1993 memcpy(env.curr_id_2_memop, pred_bl->id_2_memop_avail, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
1997 /* check for partly redundant values */
1998 for (pos = rbitset_next(bl->anticL_in, 0, 1);
2000 pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
2001 memop_t *op = bl->id_2_memop_antic[pos];
2002 int have_some, all_same;
2005 if (rbitset_is_set(env.curr_set, pos)) {
2010 assert(is_Load(op->node));
2012 DB((dbg, LEVEL_3, "anticipated %+F\n", op->node));
2017 for (i = n - 1; i >= 0; --i) {
2018 ir_node *pred = get_Block_cfgpred_block(block, i);
2019 block_t *pred_bl = get_block_entry(pred);
2020 ir_mode *mode = op->value.mode;
2024 adr = phi_translate(op->value.address, block, i);
2025 DB((dbg, LEVEL_3, ".. using address %+F in pred %d\n", adr, i));
2026 e = find_address_avail(pred_bl, register_address(adr), mode);
2028 ir_node *ef_block = get_nodes_block(adr);
2029 if (! block_dominates(ef_block, pred)) {
2030 /* cannot place a copy here */
2032 DB((dbg, LEVEL_3, "%+F cannot be moved into predecessor %+F\n", op->node, pred));
2035 DB((dbg, LEVEL_3, "%+F is not available in predecessor %+F\n", op->node, pred));
2036 pred_bl->avail = NULL;
2039 if (e->value.mode != mode && !can_convert_to(e->value.mode, mode)) {
2040 /* cannot create a Phi due to different modes */
2046 DB((dbg, LEVEL_3, "%+F is available for %+F in predecessor %+F\n", e->node, op->node, pred));
2049 else if (first != e->node)
2053 if (have_some && !all_same) {
2054 ir_mode *mode = op->value.mode;
2058 NEW_ARR_A(ir_node *, in, n);
2060 for (i = n - 1; i >= 0; --i) {
2061 ir_node *pred = get_Block_cfgpred_block(block, i);
2062 block_t *pred_bl = get_block_entry(pred);
2064 if (pred_bl->avail == NULL) {
2065 /* create a new Load here and make to make it fully redundant */
2066 dbg_info *db = get_irn_dbg_info(op->node);
2067 ir_node *last_mem = find_last_memory(pred_bl);
2068 ir_node *load, *def, *adr;
2071 assert(last_mem != NULL);
2072 adr = phi_translate(op->value.address, block, i);
2073 load = new_rd_Load(db, pred, last_mem, adr, mode, cons_none);
2074 def = new_r_Proj(pred, load, mode, pn_Load_res);
2075 DB((dbg, LEVEL_1, "Created new %+F in %+F for party redundant %+F\n", load, pred, op->node));
2077 new_op = alloc_memop(load);
2078 new_op->mem = new_r_Proj(pred, load, mode_M, pn_Load_M);
2079 new_op->value.address = adr;
2080 new_op->value.id = op->value.id;
2081 new_op->value.mode = mode;
2082 new_op->value.value = def;
2084 new_op->projs[pn_Load_M] = new_op->mem;
2085 new_op->projs[pn_Load_res] = def;
2087 new_op->prev = pred_bl->memop_backward;
2088 if (pred_bl->memop_backward != NULL)
2089 pred_bl->memop_backward->next = new_op;
2091 pred_bl->memop_backward = new_op;
2093 if (pred_bl->memop_forward == NULL)
2094 pred_bl->memop_forward = new_op;
2096 if (get_nodes_block(last_mem) == pred) {
2097 /* We have add a new last memory op in pred block.
2098 If pred had already a last mem, reroute all memory
2100 reroute_all_mem_users(last_mem, new_op->mem);
2102 /* reroute only those memory going through the pre block */
2103 reroute_mem_through(last_mem, new_op->mem, pred);
2106 /* we added this load at the end, so it will be avail anyway */
2107 add_memop_avail(pred_bl, new_op);
2108 pred_bl->avail = new_op;
2110 in[i] = conv_to(pred_bl->avail->value.value, mode);
2112 phi = new_r_Phi(block, n, in, mode);
2113 DB((dbg, LEVEL_1, "Created new %+F in %+F for now redundant %+F\n", phi, block, op->node));
2115 phi_op = clone_memop_phi(op, phi);
2121 /* recalculate avail by gen and kill */
2122 calc_gen_kill_avail(bl);
2124 /* always update the map after gen/kill, as values might have been changed due to RAR/WAR/WAW */
2125 memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2127 if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
2128 /* the avail set has changed */
2129 rbitset_copy(bl->avail_out, env.curr_set, env.rbs_size);
2130 dump_curr(bl, "Avail_out*");
2133 dump_curr(bl, "Avail_out");
2138 * Insert Loads upwards.
2140 static void insert_Loads_upwards(void) {
2144 /* recalculate antic_out and insert Loads */
2145 DB((dbg, LEVEL_2, "Inserting Loads\n"));
2149 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
2153 /* over all blocks in reverse post order, skip the start block */
2154 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
2155 need_iter |= insert_Load(bl);
2158 } while (need_iter);
2160 DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
2161 } /* insert_Loads_upwards */
2164 * Kill unreachable control flow.
2166 * @param irg the graph to operate on
2168 static void kill_unreachable_blocks(ir_graph *irg) {
2173 NEW_ARR_A(ir_node *, ins, env.max_cfg_preds);
2175 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2176 ir_node *block = bl->block;
2179 assert(get_Block_mark(block));
2181 n = get_Block_n_cfgpreds(block);
2183 for (i = j = 0; i < n; ++i) {
2184 ir_node *pred = get_Block_cfgpred(block, i);
2190 pred_bl = get_nodes_block(skip_Proj(pred));
2191 if (! get_Block_mark(pred_bl))
2197 ir_node *phi, *next;
2199 /* some unreachable blocks detected */
2202 DB((dbg, LEVEL_1, "Killing dead block predecessors on %+F\n", block));
2204 set_irn_in(block, j, ins);
2206 /* shorten all Phi nodes */
2207 for (phi = get_Block_phis(block); phi != NULL; phi = next) {
2208 next = get_Phi_next(phi);
2210 for (i = k = 0; i < n; ++i) {
2211 ir_node *pred = get_Block_cfgpred_block(block, i);
2216 if (! get_Block_mark(pred))
2219 ins[k++] = get_Phi_pred(phi, i);
2222 exchange(phi, ins[0]);
2224 set_irn_in(phi, k, ins);
2231 /* kick keep alives */
2232 ir_node *end = get_irg_end(irg);
2233 int i, j, n = get_End_n_keepalives(end);
2235 NEW_ARR_A(ir_node *, ins, n);
2237 for (i = j = 0; i < n; ++i) {
2238 ir_node *ka = get_End_keepalive(end, i);
2246 ka_bl = get_nodes_block(skip_Proj(ka));
2247 if (get_Block_mark(ka_bl))
2251 set_End_keepalives(end, j, ins);
2255 /* this transformation do NOT invalidate the dominance */
2257 } /* kill_unreachable_blocks */
2259 int opt_ldst(ir_graph *irg) {
2261 ir_graph *rem = current_ir_graph;
2263 current_ir_graph = irg;
2265 FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
2266 // firm_dbg_set_mask(dbg, -1);
2268 DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
2270 /* we need landing pads */
2271 remove_critical_cf_edges(irg);
2273 // dump_ir_block_graph(irg, "-XXX");
2275 if (get_opt_alias_analysis()) {
2276 assure_irg_entity_usage_computed(irg);
2277 assure_irp_globals_entity_usage_computed();
2280 obstack_init(&env.obst);
2281 ir_nodemap_init(&env.adr_map);
2284 env.backward = NULL;
2285 env.curr_adr_id = 0;
2287 env.max_cfg_preds = 0;
2289 env.start_bl = get_irg_start_block(irg);
2290 env.end_bl = get_irg_end_block(irg);
2291 #ifdef DEBUG_libfirm
2292 env.id_2_address = NEW_ARR_F(ir_node *, 0);
2295 assure_irg_outs(irg);
2297 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2299 /* first step: allocate block entries. Note that some blocks might be
2300 unreachable here. Using the normal walk ensures that ALL blocks are initialized. */
2301 irg_walk_graph(irg, prepare_blocks, link_phis, NULL);
2303 /* produce an inverse post-order list for the CFG: this links only reachable
2305 irg_out_block_walk(get_irg_start_block(irg), NULL, inverse_post_order, NULL);
2307 if (! get_Block_mark(env.end_bl)) {
2309 * The end block is NOT reachable due to endless loops
2310 * or no_return calls.
2311 * Place the end block last.
2312 * env.backward points to the last block in the list for this purpose.
2314 env.backward->forward_next = get_block_entry(env.end_bl);
2316 set_Block_mark(env.end_bl, 1);
2319 /* KILL unreachable blocks: these disturb the data flow analysis */
2320 kill_unreachable_blocks(irg);
2324 /* second step: find and sort all memory ops */
2325 walk_memory_irg(irg, collect_memops, NULL, NULL);
2327 #ifdef DEBUG_libfirm
2328 /* check that the backward map is correct */
2329 assert((unsigned)ARR_LEN(env.id_2_address) == env.curr_adr_id);
2332 if (env.n_mem_ops == 0) {
2337 /* create the backward links. */
2338 env.backward = NULL;
2339 irg_block_walk_graph(irg, NULL, collect_backward, NULL);
2341 /* link the end block in */
2342 bl = get_block_entry(env.end_bl);
2343 bl->backward_next = env.backward;
2346 /* check that we really start with the start / end block */
2347 assert(env.forward->block == env.start_bl);
2348 assert(env.backward->block == env.end_bl);
2350 /* create address sets: for now, only the existing addresses are allowed plus one
2351 needed for the sentinel */
2352 env.rbs_size = env.curr_adr_id + 1;
2354 /* create the current set */
2355 env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2356 rbitset_set(env.curr_set, env.rbs_size - 1);
2357 env.curr_id_2_memop = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2358 memset(env.curr_id_2_memop, 0, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
2360 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2361 /* set sentinel bits */
2362 bl->avail_out = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2363 rbitset_set(bl->avail_out, env.rbs_size - 1);
2365 bl->id_2_memop_avail = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2366 memset(bl->id_2_memop_avail, 0, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
2368 bl->anticL_in = rbitset_obstack_alloc(&env.obst, env.rbs_size);
2369 rbitset_set(bl->anticL_in, env.rbs_size - 1);
2371 bl->id_2_memop_antic = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
2372 memset(bl->id_2_memop_antic, 0, env.rbs_size * sizeof(bl->id_2_memop_antic[0]));
2375 // dump_block_list(&env);
2376 (void) dump_block_list;
2381 insert_Loads_upwards();
2384 /* over all blocks in reverse post order */
2385 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
2386 do_replacements(bl);
2389 /* not only invalidate but free them. We might allocate new out arrays
2390 on our obstack which will be deleted yet. */
2392 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
2396 ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_BLOCK_MARK);
2397 ir_nodemap_destroy(&env.adr_map);
2398 obstack_free(&env.obst, NULL);
2400 // dump_ir_block_graph(irg, "-YYY");
2402 #ifdef DEBUG_libfirm
2403 DEL_ARR_F(env.id_2_address);
2406 current_ir_graph = rem;
2407 return env.changed != 0;
2410 ir_graph_pass_t *opt_ldst_pass(const char *name, int verify, int dump)
2412 return def_graph_pass(name ? name : "ldst_df", verify, dump, opt_ldst);
2413 } /* opt_ldst_pass */