2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Dataflow driven Load/Store optimizations, uses some ideas from
24 * @author Michael Beck
39 #include "irnodemap.h"
40 #include "raw_bitset.h"
45 * Mapping an address to an dense ID.
47 typedef struct address_entry_t {
48 unsigned id; /**< The ID */
55 FLAG_KILL_LOADS = 1, /**< KILL all Loads */
56 FLAG_KILL_STORES = 2, /**< KILL all Stores */
57 FLAG_KILLED_NODE = 4, /**< this node was killed */
58 FLAG_EXCEPTION = 8, /**< this node has exception flow */
59 FLAG_IGNORE = 16, /**< ignore this node (volatile or other) */
60 /** this memop KILLS all addresses */
61 FLAG_KILL_ALL = FLAG_KILL_LOADS|FLAG_KILL_STORES
65 * A value: This represents a value stored at a given address in
66 * memory. Do not confuse with values from value numbering.
68 typedef struct value_t value_t;
70 ir_node *address; /**< the address of this value */
71 ir_node *value; /**< the value itself */
72 ir_mode *mode; /**< the mode of the value */
73 unsigned id; /**< address id */
77 * A memop describes an memory-related operation.
78 * These are Loads/Store and all other ops that might modify
79 * memory (Calls, CopyB) or causing exceptions.
81 typedef struct memop_t memop_t;
83 value_t value; /**< the value of this memop: only defined for Load/Store */
84 ir_node *node; /**< the memory op itself */
85 ir_node *mem; /**< the memory FROM this node */
86 ir_node *replace; /**< the replacement node if this memop is replaced */
87 memop_t *next; /**< links to the next memory op in the block in forward order. */
88 memop_t *prev; /**< links to the previous memory op in the block in forward order. */
89 unsigned flags; /**< memop flags */
93 * Additional data for every basic block.
95 typedef struct block_t block_t;
97 memop_t *memop_forward; /**< topologically sorted list of memory ops in this block */
98 memop_t *memop_backward; /**< last memop in the list */
99 unsigned *avail_out; /**< out-set of available addresses */
100 memop_t **id_2_memop_avail; /**< maps avail address ids to memops */
101 unsigned *anticL_in; /**< in-set of anticipated Load addresses */
102 memop_t **id_2_memop; /**< maps address ids to memops */
103 ir_node *block; /**< the associated block */
104 block_t *forward_next; /**< next block entry for forward iteration */
105 block_t *backward_next; /**< next block entry for backward iteration */
106 memop_t *avail; /**< used locally for the avail map */
110 * Metadata for this pass.
112 typedef struct ldst_env_t {
113 struct obstack obst; /**< obstack for temporary data */
114 ir_nodemap_t adr_map; /**< Map addresses to */
115 block_t *forward; /**< Inverse post-order list of all blocks Start->End */
116 block_t *backward; /**< Inverse post-order list of all blocks End->Start */
117 ir_node *start_bl; /**< start block of the current graph */
118 ir_node *end_bl; /**< end block of the current graph */
119 unsigned *curr_set; /**< current set of addresses */
120 unsigned curr_adr_id; /**< number for address mapping */
121 unsigned n_mem_ops; /**< number of memory operations (Loads/Stores) */
122 unsigned rbs_size; /**< size of all bitsets in bytes */
123 int changed; /**< Flags for changed graph state */
128 static firm_dbg_module_t *dbg;
130 /* the one and only environment */
134 * Dumps the block list.
136 * @param ldst environment
138 static void dump_block_list(ldst_env *env) {
143 for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
144 DB((dbg, LEVEL_2, "%+F {", entry->block));
147 for (op = entry->memop_forward; op != NULL; op = op->next) {
149 DB((dbg, LEVEL_2, "\n\t"));
150 } DB((dbg, LEVEL_2, "%+F", op->node));
151 if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
152 DB((dbg, LEVEL_2, "X"));
153 else if (op->flags & FLAG_KILL_LOADS)
154 DB((dbg, LEVEL_2, "L"));
155 else if (op->flags & FLAG_KILL_STORES)
156 DB((dbg, LEVEL_2, "S"));
157 DB((dbg, LEVEL_2, ", "));
161 DB((dbg, LEVEL_2, "\n}\n\n"));
166 * Dumps the current set.
168 * @param bl current block
169 * @param s name of the set
171 static void dump_curr(block_t *bl, const char *s) {
173 unsigned end = env.n_mem_ops * 2 - 1;
176 DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
178 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
179 memop_t *op = bl->id_2_memop[pos];
182 DB((dbg, LEVEL_2, "\n\t"));
184 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
187 DB((dbg, LEVEL_2, "\n}\n"));
191 #define dump_block_list()
192 #define dump_curr(bl, s)
193 #endif /* DEBUG_libfirm */
195 /** Get the block entry for a block node */
196 static block_t *get_block_entry(const ir_node *block) {
197 assert(is_Block(block));
199 return get_irn_link(block);
202 /** Get the memop entry for a memory operation node */
203 static memop_t *get_irn_memop(const ir_node *irn) {
204 assert(! is_Block(irn));
205 return get_irn_link(irn);
209 * Walk over the memory edges from definition to users.
211 * @param irn start node
212 * @param pre pre walker function
213 * @param post post walker function
214 * @param ctx context parameter for the walker functions
216 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx) {
220 mark_irn_visited(irn);
225 mode = get_irn_mode(irn);
226 if (mode == mode_M) {
227 /* every successor uses memory */
228 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
229 ir_node *succ = get_irn_out(irn, i);
231 if (! irn_visited(succ))
232 walk_memory(succ, pre, post, ctx);
234 } else if (mode == mode_T) {
235 /* only some Proj's uses memory */
236 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
237 ir_node *proj = get_irn_out(irn, i);
239 if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
240 walk_memory(proj, pre, post, ctx);
248 * Walks over all memory nodes of a graph.
251 * @param pre pre walker function
252 * @param post post walker function
253 * @param ctx context parameter for the walker functions
255 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx) {
256 inc_irg_visited(irg);
258 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
261 * there are two possible sources for memory: initial_mem and nomem
262 * we ignore nomem as this should NOT change the memory
264 walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
266 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
270 * Block walker: allocate an block entry for every block.
272 static void prepare_blocks(ir_node *block, void *ctx) {
273 block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
277 entry->memop_forward = NULL;
278 entry->memop_backward = NULL;
279 entry->avail_out = NULL;
280 entry->id_2_memop_avail = NULL;
281 entry->anticL_in = NULL;
282 entry->id_2_memop = NULL;
283 entry->block = block;
284 entry->forward_next = env.forward;
285 entry->backward_next = NULL;
287 set_irn_link(block, entry);
289 /* create the list in inverse order */
291 /* remember temporary the last one */
292 env.backward = entry;
296 * Block walker: create backward links for the memops of a block.
298 static void collect_backward(ir_node *block, void *ctx) {
299 block_t *entry = get_block_entry(block);
305 * Do NOT link in the end block yet. We want it to be
306 * the first in the list. This is NOT guaranteed by the walker
307 * if we have endless loops.
309 if (block != env.end_bl) {
310 entry->backward_next = env.backward;
312 /* create the list in inverse order */
313 env.backward = entry;
316 /* create backward links for all memory ops */
318 for (op = entry->memop_forward; op != NULL; op = op->next) {
322 entry->memop_backward = last;
328 * @param irn the IR-node representing the memop
330 static memop_t *alloc_memop(ir_node *irn) {
331 memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
333 m->value.address = NULL;
334 m->value.value = NULL;
335 m->value.mode = NULL;
342 set_irn_link(irn, m);
347 * Register an address and allocate an ID for it.
349 * @param adr the IR-node representing the address
351 static unsigned register_address(ir_node *adr) {
352 address_entry *entry;
354 /* skip Confirms and Casts */
356 if (is_Confirm(adr)) {
357 adr = get_Confirm_value(adr);
361 adr = get_Cast_op(adr);
365 entry = ir_nodemap_get(&env.adr_map, adr);
369 entry = obstack_alloc(&env.obst, sizeof(*entry));
371 entry->id = env.curr_adr_id++;
372 ir_nodemap_insert(&env.adr_map, adr, entry);
378 * Return the memory properties of a call node.
380 * @param call the call node
382 * return a bitset of mtp_property_const and mtp_property_pure
384 static unsigned get_Call_memory_properties(ir_node *call) {
385 ir_type *call_tp = get_Call_type(call);
386 unsigned prop = get_method_additional_properties(call_tp);
388 /* check first the call type */
389 if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
390 /* try the called entity */
391 ir_node *ptr = get_Call_ptr(call);
393 if (is_Global(ptr)) {
394 ir_entity *ent = get_Global_entity(ptr);
396 prop = get_entity_additional_properties(ent);
399 return prop & (mtp_property_const|mtp_property_pure);
403 * Update a memop for a Load.
407 static void update_Load_memop(memop_t *m) {
409 ir_node *load = m->node;
410 ir_node *adr = get_Load_ptr(load);
412 if (get_Load_volatility(load) == volatility_is_volatile)
413 m->flags |= FLAG_IGNORE;
415 m->value.address = adr;
417 for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
418 ir_node *proj = get_irn_out(load, i);
420 /* beware of keep edges */
424 switch (get_Proj_proj(proj)) {
426 m->value.value = proj;
427 m->value.mode = get_irn_mode(proj);
429 case pn_Load_X_except:
430 m->flags |= FLAG_EXCEPTION;
438 if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
439 /* only create an address if this node is NOT killed immediately or ignored */
440 m->value.id = register_address(adr);
443 /* no user, KILL it */
444 m->flags |= FLAG_KILLED_NODE;
449 * Update a memop for a Store.
453 static void update_Store_memop(memop_t *m) {
455 ir_node *store = m->node;
456 ir_node *adr = get_Store_ptr(store);
458 if (get_Store_volatility(store) == volatility_is_volatile) {
459 m->flags |= FLAG_IGNORE;
461 /* only create an address if this node is NOT ignored */
462 m->value.id = register_address(adr);
466 m->value.address = adr;
468 for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
469 ir_node *proj = get_irn_out(store, i);
471 /* beware of keep edges */
475 switch (get_Proj_proj(proj)) {
476 case pn_Store_X_except:
477 m->flags |= FLAG_EXCEPTION;
484 m->value.value = get_Store_value(store);
485 m->value.mode = get_irn_mode(m->value.value);
489 * Update a memop for a Call.
493 static void update_Call_memop(memop_t *m) {
494 ir_node *call = m->node;
495 unsigned prop = get_Call_memory_properties(call);
498 if (prop & mtp_property_const) {
499 /* A constant call did NOT use memory at all, we
500 can kick it from the list. */
501 } else if (prop & mtp_property_pure) {
502 /* pure calls READ memory */
503 m->flags = FLAG_KILL_STORES;
505 m->flags = FLAG_KILL_ALL;
507 for (i = get_irn_n_outs(call) - 1; i >= 0; --i) {
508 ir_node *proj = get_irn_out(call, i);
510 /* beware of keep edges */
514 switch (get_Proj_proj(proj)) {
515 case pn_Call_X_except:
516 m->flags |= FLAG_EXCEPTION;
518 case pn_Call_M_regular:
526 * Update a memop for a Div/Mod/Quot/DivMod.
530 static void update_DivOp_memop(memop_t *m) {
531 ir_node *div = m->node;
534 for (i = get_irn_n_outs(div) - 1; i >= 0; --i) {
535 ir_node *proj = get_irn_out(div, i);
537 /* beware of keep edges */
541 switch (get_Proj_proj(proj)) {
542 case pn_Generic_X_except:
543 m->flags |= FLAG_EXCEPTION;
545 case pn_Generic_M_regular:
553 * Update a memop for a Phi.
557 static void update_Phi_memop(memop_t *m) {
558 /* the Phi is it's own mem */
563 * Memory walker: collect all memory ops and build topological lists.
565 static void collect_memops(ir_node *irn, void *ctx) {
572 /* we can safely ignore ProjM's except the initial memory */
573 if (irn != get_irg_initial_mem(current_ir_graph))
577 op = alloc_memop(irn);
578 block = get_nodes_block(irn);
579 entry = get_block_entry(block);
582 update_Phi_memop(op);
583 /* Phis must be always placed first */
584 op->next = entry->memop_forward;
585 entry->memop_forward = op;
586 if (entry->memop_backward == NULL)
587 entry->memop_backward = op;
589 switch (get_irn_opcode(irn)) {
591 update_Load_memop(op);
594 update_Store_memop(op);
597 update_Call_memop(op);
609 /* we can those to find the memory edge */
615 update_DivOp_memop(op);
619 /* TODO: handle some builtins */
621 /* unsupported operation */
622 op->flags = FLAG_KILL_ALL;
626 /* all other should be placed last */
627 if (entry->memop_backward == NULL) {
628 entry->memop_forward = entry->memop_backward = op;
630 entry->memop_backward->next = op;
631 entry->memop_backward = op;
637 * Find an address in the current set.
639 * @param bl the block
640 * @param value the value to be searched for
642 static memop_t *find_address(const block_t *bl, const value_t *value) {
643 if (rbitset_is_set(env.curr_set, value->id)) {
644 memop_t *res = bl->id_2_memop[value->id];
646 if (res->value.mode == value->mode)
648 /* allow hidden casts */
649 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
650 get_mode_arithmetic(value->mode) == irma_twos_complement &&
651 get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
658 * Find an address in the avail_out set.
660 * @param bl the block
661 * @param value the value to be searched for
663 static memop_t *find_address_avail(const block_t *bl, const value_t *value) {
664 if (rbitset_is_set(bl->avail_out, value->id)) {
665 memop_t *res = bl->id_2_memop_avail[value->id];
667 if (res->value.mode == value->mode)
669 /* allow hidden casts */
670 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
671 get_mode_arithmetic(value->mode) == irma_twos_complement &&
672 get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
679 * Kill all Loads from the current set.
681 * @param bl the current block
683 static void kill_all_loads(block_t *bl) {
685 unsigned end = env.n_mem_ops * 2 - 1;
687 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
688 memop_t *op = bl->id_2_memop[pos];
690 if (! is_Store(op->node))
691 rbitset_clear(env.curr_set, pos);
696 * Kill all Stores from the current set.
698 * @param bl the current block
700 static void kill_all_stores(block_t *bl) {
702 unsigned end = env.n_mem_ops * 2 - 1;
704 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
705 memop_t *op = bl->id_2_memop[pos];
707 if (is_Store(op->node))
708 rbitset_clear(env.curr_set, pos);
713 * Kill all addresses from the current set.
715 static void kill_all(void) {
716 rbitset_clear_all(env.curr_set, env.rbs_size);
719 rbitset_set(env.curr_set, env.rbs_size - 1);
724 * Kill Stores that are not alias free due to a Load value from the current set.
726 * @param bl the block
727 * @param value the Load value
729 static void kill_stores(block_t *bl, const value_t *value) {
731 unsigned end = env.n_mem_ops * 2 - 1;
733 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
734 memop_t *op = bl->id_2_memop[pos];
736 if (is_Store(op->node)) {
737 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
738 op->value.address, op->value.mode)) {
739 rbitset_clear(env.curr_set, pos);
740 bl->id_2_memop[pos] = NULL;
747 * Kill memops that are not alias free due to a Store value from the current set.
749 * @param bl the block
750 * @param value the Store value
752 static void kill_memops(block_t *bl, const value_t *value) {
754 unsigned end = env.n_mem_ops * 2 - 1;
756 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
757 memop_t *op = bl->id_2_memop[pos];
759 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
760 op->value.address, op->value.mode)) {
761 rbitset_clear(env.curr_set, pos);
762 bl->id_2_memop[pos] = NULL;
768 * Add the value of a memop to the current set.
770 * @param bl the block
771 * @param op the memory op
773 static void add_memop(block_t *bl, memop_t *op) {
774 rbitset_set(env.curr_set, op->value.id);
775 bl->id_2_memop[op->value.id] = op;
779 * Add the value of a memop to the avail_out set.
781 * @param bl the block
782 * @param op the memory op
784 static void add_memop_avail(block_t *bl, memop_t *op) {
785 rbitset_set(bl->avail_out, op->value.id);
786 bl->id_2_memop_avail[op->value.id] = op;
790 * Add a Conv if needed.
792 static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
793 if (get_irn_mode(irn) != mode) {
794 ir_node *block = get_nodes_block(irn);
795 return new_r_Conv(current_ir_graph, block, irn, mode);
801 * build a phi definition for a value in the given block
803 * @param block the block
804 * @param value the value
806 * @return the definition
808 static ir_node *build_phi_definition(ir_node *block, const value_t *value) {
812 ir_mode *mode = value->mode;
814 /* no: we need a new Phi */
815 n = get_Block_n_cfgpreds(block);
818 block = get_Block_cfgpred_block(block, 0);
819 n = get_Block_n_cfgpreds(block);
820 mop = find_address_avail(get_block_entry(block), value);
823 /* this can only happen, if modes cannot be converted */
829 NEW_ARR_A(ir_node *, in, n);
830 for (i = n - 1; i >= 0; --i) {
831 ir_node *pred_bl = get_Block_cfgpred_block(block, i);
834 mop = find_address_avail(get_block_entry(pred_bl), value);
838 def_bl = get_nodes_block(mop->value.value);
840 if (block_dominates(def_bl, pred_bl))
841 in[i] = conv_to(mop->value.value, mode);
843 ir_node *def = build_phi_definition(pred_bl, &mop->value);
846 in[i] = conv_to(def, mode);
850 phi = new_r_Phi(current_ir_graph, block, n, in, mode);
851 DB((dbg, LEVEL_2, "Created new Phi %+F for value %+F in block %+F\n", phi, value->value, block));
856 * find a definition for a value in the given block or above.
858 * @param block the block
859 * @param op the memop: the definition must dominate it
860 * @param value the value
862 * @return the definition or NULL if modes are different
864 static ir_node *find_definition(ir_node *block, const memop_t *op, const value_t *value) {
865 ir_node *def_block = get_nodes_block(value->value);
867 if (def_block == block) {
868 /* the value is in our block: check, if it is above in the memory list */
871 for (p = op->prev; p != NULL; p = p->prev) {
872 if (!(p->flags & FLAG_KILLED_NODE) &&
873 p->value.address == value->address) {
875 assert(p->value.value == value->value);
876 return p->value.value;
879 } else if (block_dominates(def_block, block)) {
880 /* strictly dominates */
884 return build_phi_definition(block, value);
888 * Mark a Load memop to be replace by a definition
890 * @param op the Load memop
892 static void mark_replace_load(memop_t *op, ir_node *def) {
894 op->flags |= FLAG_KILLED_NODE;
899 * Mark a Store memop to be removed.
901 * @param op the Store memop
903 static void mark_remove_store(memop_t *op) {
904 op->flags |= FLAG_KILLED_NODE;
908 #define BYTE_SIZE(x) (((x) + 7) >> 3)
911 * Do forward dataflow analysis on a given block to calculate the avail_out set.
913 * @param bl the block
915 * @return non-zero if the set has changed since last iteration
917 static int forward_avail(block_t *bl) {
921 ir_node *pred = get_Block_cfgpred_block(bl->block, 0);
922 block_t *pred_bl = get_block_entry(pred);
924 rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
926 n = get_Block_n_cfgpreds(bl->block);
930 /* more than one predecessors, calculate the join */
931 for (i = n - 1; i > 0; --i) {
932 ir_node *pred = skip_Proj(get_Block_cfgpred(bl->block, i));
933 block_t *pred_bl = get_block_entry(get_nodes_block(pred));
935 rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
937 if (is_Load(pred) || is_Store(pred)) {
938 /* We reached this block by an exception from a Load or Store:
939 * the memop was NOT completed than, kill it
941 memop_t *exc_op = get_irn_memop(pred);
942 rbitset_clear(env.curr_set, exc_op->value.id);
946 /* sure that all values are in the map */
947 for (pos = env.rbs_size - 1; pos >= 0; --pos) {
948 if (! rbitset_is_set(env.curr_set, pos))
949 bl->id_2_memop[pos] = NULL;
951 for (i = n - 1; i >= 0; --i) {
952 ir_node *pred = get_Block_cfgpred_block(bl->block, i);
953 block_t *pred_bl = get_block_entry(pred);
955 if (pred_bl->id_2_memop[pos] != NULL) {
956 bl->id_2_memop[pos] = pred_bl->id_2_memop[pos];
963 /* only one predecessor, simply copy the map */
964 memcpy(bl->id_2_memop, pred_bl->id_2_memop, env.rbs_size * sizeof(bl->id_2_memop[0]));
967 dump_curr(bl, "Avail_in");
969 for (op = bl->memop_forward; op != NULL; op = op->next) {
970 switch (get_irn_opcode(op->node)) {
978 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
979 /* do we have this already? */
980 memop_t *other = find_address(bl, &op->value);
982 def = find_definition(bl->block, op, &other->value);
985 if (is_Store(other->node)) {
987 DB((dbg, LEVEL_1, "RAW %+F <- %+F(%+F)\n", op->node, def, other->node));
990 DB((dbg, LEVEL_1, "RAR %+F <- %+F(%+F)\n", op->node, def, other->node));
993 mark_replace_load(op, def);
1000 kill_stores(bl, &op->value);
1006 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1007 /* do we have this store already */
1008 memop_t *other = find_address(bl, &op->value);
1009 if (other != NULL) {
1010 if (is_Store(other->node) &&
1011 get_nodes_block(other->node) == get_nodes_block(op->node)) {
1013 * A WAW in the same block we can kick the first store.
1014 * This is a shortcut: we know that the second Store will be anticipated
1017 DB((dbg, LEVEL_1, "WAW %+F <- %+F\n", op->node, other->node));
1018 mark_remove_store(other);
1019 /* FIXME: a Load might be get freed due to this killed store */
1020 } else if (other->value.value == op->value.value) {
1022 DB((dbg, LEVEL_1, "WAR %+F <- %+F\n", op->node, other->node));
1023 mark_remove_store(op);
1025 /* we overwrite the value that was loaded */
1029 /* add this value */
1030 kill_memops(bl, &op->value);
1036 switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
1037 case FLAG_KILL_LOADS|FLAG_KILL_STORES:
1040 case FLAG_KILL_LOADS:
1043 case FLAG_KILL_STORES:
1044 kill_all_stores(bl);
1053 * Always copy the map as it might get updated.
1054 * However, an update is NOT recorded as a change, as the set of available addresses
1057 memcpy(bl->id_2_memop_avail, bl->id_2_memop, env.rbs_size * sizeof(bl->id_2_memop[0]));
1058 if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
1060 rbitset_cpy(bl->avail_out, env.curr_set, env.rbs_size);
1061 dump_curr(bl, "Avail_out*");
1064 dump_curr(bl, "Avail_out");
1069 * Do backward dataflow analysis on a given block to calculate the antic set
1070 * of Loaded addresses.
1072 * @param bl the block
1074 * @return non-zero if the set has changed since last iteration
1076 static int backward_antic(block_t *bl) {
1078 int n = get_Block_n_cfg_outs(bl->block);
1081 ir_node *succ = get_Block_cfg_out(bl->block, 0);
1082 block_t *succ_bl = get_block_entry(succ);
1085 rbitset_cpy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1086 memcpy(bl->id_2_memop, succ_bl->id_2_memop, env.rbs_size * sizeof(bl->id_2_memop[0]));
1088 for (i = n - 1; i > 0; --i) {
1089 ir_node *succ = get_Block_cfg_out(bl->block, i);
1090 block_t *succ_bl = get_block_entry(succ);
1092 rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1095 /* block ends with a noreturn call */
1100 /* cleanup: kill those Loads which address is not available */
1101 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1102 memop_t *op = succ_bl->id_2_memop[pos];
1103 ir_node *ptr = get_Load_ptr(op->node);
1104 ir_node *ptr_bl = get_nodes_block(ptr);
1106 if (!block_dominates(ptr_bl, bl->block))
1107 rbitset_clear(env.curr_set, pos);
1111 dump_curr(bl, "AnticL_out");
1113 for (op = bl->memop_backward; op != NULL; op = op->prev) {
1114 switch (get_irn_opcode(op->node)) {
1122 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1128 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1129 /* a Store: check which memops must be killed */
1130 kill_memops(bl, &op->value);
1134 switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
1135 case FLAG_KILL_LOADS|FLAG_KILL_STORES:
1138 case FLAG_KILL_LOADS:
1141 case FLAG_KILL_STORES:
1142 kill_all_stores(bl);
1149 dump_curr(bl, "AnticL_in");
1150 if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1152 rbitset_cpy(bl->anticL_in, env.curr_set, env.rbs_size);
1159 * Replace a Load memop by a already known value.
1161 * @param op the Load memop
1163 static void replace_load(memop_t *op) {
1164 ir_node *load = op->node;
1165 ir_node *def = skip_Id(op->replace);
1170 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1172 if (op->flags & FLAG_EXCEPTION) {
1173 /* bad: this node is unused and executed for exception only */
1174 DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1177 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1179 for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
1180 ir_node *proj = get_irn_out(load, i);
1182 switch (get_Proj_proj(proj)) {
1184 exchange(proj, get_Load_mem(load));
1187 mode = get_irn_mode(proj);
1188 if (get_irn_mode(def) != mode) {
1190 dbg_info *db = get_irn_dbg_info(load);
1191 ir_node *block = get_nodes_block(proj);
1192 def = new_rd_Conv(db, current_ir_graph, block, def, mode);
1194 exchange(proj, def);
1196 case pn_Load_X_except:
1197 exchange(proj, new_Bad());
1199 case pn_Load_X_regular:
1200 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1203 panic("Unknown Proj from Load");
1209 * Remove a Store memop.
1211 * @param op the Store memop
1213 static void remove_store(memop_t *op) {
1214 ir_node *store = op->node;
1217 DB((dbg, LEVEL_1, "Removing %+F\n", store));
1218 for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
1219 ir_node *proj = get_irn_out(store, i);
1221 switch (get_Proj_proj(proj)) {
1223 exchange(proj, get_Store_mem(store));
1225 case pn_Store_X_except:
1226 exchange(proj, new_Bad());
1228 case pn_Store_X_regular:
1229 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(store)));
1232 panic("Unknown Proj from Store");
1239 * Do all necessary replacements for a given block.
1241 * @param bl the block
1243 static void do_replacements(block_t *bl) {
1246 for (op = bl->memop_forward; op != NULL; op = op->next) {
1247 if (op->flags & FLAG_KILLED_NODE) {
1248 switch (get_irn_opcode(op->node)) {
1261 * Calculate the Avail_out sets for all basic blocks.
1263 static void calcAvail(void) {
1267 /* calculate avail_out */
1268 DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1271 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1275 /* over all blocks in reverse post order, skip the start block */
1276 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1277 need_iter |= forward_avail(bl);
1280 } while (need_iter);
1282 DB((dbg, LEVEL_2, "Get avail set after %d iterations\n\n", i));
1286 * Calculate the Antic_in sets for all basic blocks.
1288 static void calcAntic(void) {
1291 /* calculate antic_out */
1292 DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1297 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1301 /* over all blocks in reverse post order */
1302 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1303 need_iter |= backward_antic(bl);
1306 } while (need_iter);
1307 DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1311 * Return the node representing the last memory in a block.
1313 * @param bl the block
1315 static ir_node *find_first_memory(block_t *bl) {
1317 if (bl->memop_forward != NULL) {
1318 return bl->memop_forward->node;
1320 /* if there is NO memory in this block, go to the post dominator */
1321 bl = get_block_entry(get_Block_ipostdom(bl->block));
1326 * Return the node representing the last memory in a block.
1328 * @param bl the block
1330 static ir_node *find_last_memory(block_t *bl) {
1332 if (bl->memop_backward != NULL) {
1333 return bl->memop_backward->mem;
1335 /* if there is NO memory in this block, go to the dominator */
1336 bl = get_block_entry(get_Block_idom(bl->block));
1341 * Reroute all memory users of old memory
1342 * to a new memory IR-node.
1344 * @param omem the old memory IR-node
1345 * @param nmem the new memory IR-node
1347 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
1350 for (i = get_irn_n_outs(omem) - 1; i >= 0; --i) {
1352 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1354 set_irn_n(user, n_pos, nmem);
1357 /* all edges previously point to omem now point to nmem */
1358 nmem->out = omem->out;
1362 * Reroute memory users of old memory that are dominated by a given block
1363 * to a new memory IR-node.
1365 * @param omem the old memory IR-node
1366 * @param nmem the new memory IR-node
1367 * @param pass_bl the block the memory must pass
1369 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl) {
1370 int i, j, n = get_irn_n_outs(omem);
1371 ir_def_use_edge *edges = NEW_ARR_D(ir_def_use_edge, &env.obst, n + 1);
1373 for (i = j = 0; i < n; ++i) {
1375 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1376 ir_node *use_bl = get_nodes_block(user);
1380 use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1382 if (block_dominates(pass_bl, use_bl)) {
1383 /* found an user that is dominated */
1385 edges[j].pos = n_pos;
1386 edges[j].use = user;
1388 set_irn_n(user, n_pos, nmem);
1392 /* Modify the out structure: we create a new out edge array on our
1393 temporary obstack here. This should be no problem, as we invalidate the edges
1394 at the end either. */
1395 /* first entry is used for the length */
1404 static void insert_Load(ir_node *block, void *ctx) {
1405 int *new_stuff = ctx;
1407 int i, n = get_Block_n_cfgpreds(block);
1409 unsigned end = env.n_mem_ops * 2 - 1;
1414 bl = get_block_entry(block);
1416 for (pos = rbitset_next(bl->anticL_in, pos, 1); pos != end; pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
1417 memop_t *op = bl->id_2_memop[pos];
1418 int have_some, all_same;
1421 assert(is_Load(op->node));
1423 if (op->flags & FLAG_KILLED_NODE)
1429 for (i = n - 1; i >= 0; --i) {
1430 ir_node *pred = get_Block_cfgpred_block(block, i);
1431 block_t *pred_bl = get_block_entry(pred);
1432 memop_t *e = find_address_avail(pred_bl, &op->value);
1435 ir_node *block = get_nodes_block(op->value.address);
1436 if (! block_dominates(block, pred)) {
1437 /* cannot place a copy here */
1441 pred_bl->avail = NULL;
1448 else if (first != e->node)
1452 if (have_some && !all_same) {
1453 ir_mode *mode = op->value.mode;
1456 NEW_ARR_A(ir_node *, in, n);
1458 for (i = n - 1; i >= 0; --i) {
1459 ir_node *pred = get_Block_cfgpred_block(block, i);
1460 block_t *pred_bl = get_block_entry(pred);
1462 if (pred_bl->avail == NULL) {
1463 /* create a new Load here and make to make it fully redundant */
1464 dbg_info *db = get_irn_dbg_info(op->node);
1465 ir_node *last_mem = find_last_memory(pred_bl);
1466 ir_node *load, *def;
1469 assert(last_mem != NULL);
1470 load = new_rd_Load(db, current_ir_graph, pred, last_mem, op->value.address, mode, cons_none);
1471 def = new_r_Proj(current_ir_graph, pred, load, mode, pn_Load_res);
1472 DB((dbg, LEVEL_1, "Created new %+F for party redundant %+F\n", load, op->node));
1474 new_op = alloc_memop(load);
1475 new_op->mem = new_r_Proj(current_ir_graph, pred, load, mode_M, pn_Load_M);
1476 new_op->value.address = op->value.address;
1477 new_op->value.id = op->value.id;
1478 new_op->value.mode = mode;
1479 new_op->value.value = def;
1481 new_op->prev = pred_bl->memop_backward;
1482 pred_bl->memop_backward = new_op;
1484 if (get_nodes_block(last_mem) == pred) {
1485 /* We have add a new last memory op in pred block.
1486 If pred had already a last mem, reroute all memory
1488 reroute_all_mem_users(last_mem, new_op->mem);
1490 /* reroute only those memory going through the pre block */
1491 reroute_mem_through(last_mem, new_op->mem, pred);
1494 /* we added this load at the end, so it will be avail anyway */
1495 add_memop_avail(pred_bl, new_op);
1496 pred_bl->avail = new_op;
1498 in[i] = pred_bl->avail->value.value;
1500 phi = new_r_Phi(current_ir_graph, block, n, in, mode);
1501 mark_replace_load(op, phi);
1504 if (get_nodes_block(op->node) == block) {
1505 /* The redundant node was in the current block:
1506 In that case, DO NOT update avail_out. If it was NOT
1507 avail although it is executed in this bLock, it is killed by a later
1511 /* The redundant node is NOT in the current block and anticipated.
1512 This can only happen IFF nothings kills the Load in the current block,
1513 so it will be avail in the next iteration.
1515 add_memop_avail(bl, op);
1517 /* TODO propagate it downwards */
1524 * Insert Loads upwards.
1526 static void insert_Loads_upwards(void) {
1529 /* calculate antic_out */
1530 DB((dbg, LEVEL_2, "Inserting Loads\n"));
1533 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1536 dom_tree_walk_irg(current_ir_graph, insert_Load, NULL, &need_iter);
1538 } while (need_iter);
1539 DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
1542 int opt_ldst(ir_graph *irg) {
1544 ir_graph *rem = current_ir_graph;
1546 current_ir_graph = irg;
1548 FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
1550 DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
1552 /* we need landing pads */
1553 remove_critical_cf_edges(irg);
1555 dump_ir_block_graph(irg, "-XXX");
1557 if (get_opt_alias_analysis()) {
1558 assure_irg_entity_usage_computed(irg);
1559 assure_irp_globals_entity_usage_computed();
1562 obstack_init(&env.obst);
1563 ir_nodemap_init(&env.adr_map);
1566 env.backward = NULL;
1567 env.curr_adr_id = 0;
1570 env.start_bl = get_irg_start_block(irg);
1571 env.end_bl = get_irg_end_block(irg);
1574 assure_irg_outs(irg);
1576 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1578 /* first step: allocate block entries: produces an
1579 inverse post-order list for the CFG */
1580 set_irn_link(env.end_bl, NULL);
1581 irg_out_block_walk(get_irg_start_block(irg), NULL, prepare_blocks, NULL);
1583 if (get_block_entry(env.end_bl) == NULL) {
1585 * The end block is NOT reachable due to endless loops
1586 * or no_return calls. Ensure that it is initialized.
1587 * Note that this places the entry for the end block first, so we must fix this.
1588 * env.backwards points to th last block for this purpose.
1590 prepare_blocks(env.end_bl, NULL);
1593 env.forward = bl->forward_next;
1594 bl->forward_next = NULL;
1596 env.backward->forward_next = bl;
1599 /* second step: find and sort all memory ops */
1600 walk_memory_irg(irg, collect_memops, NULL, NULL);
1602 if (env.n_mem_ops == 0) {
1607 /* create the backward links */
1608 env.backward = NULL;
1609 irg_block_walk_graph(irg, NULL, collect_backward, NULL);
1611 /* link the end block in */
1612 bl = get_block_entry(env.end_bl);
1613 bl->backward_next = env.backward;
1616 /* check that we really start with the start / end block */
1617 assert(env.forward->block == env.start_bl);
1618 assert(env.backward->block == env.end_bl);
1620 /* create address sets: we know that 2 * n_mem_ops - 1 is an upper bound for all possible addresses */
1621 env.rbs_size = 2 * env.n_mem_ops;
1623 /* create the current set */
1624 env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1625 rbitset_set(env.curr_set, env.rbs_size - 1);
1627 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
1628 /* set sentinel bits */
1629 bl->avail_out = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1630 rbitset_set(bl->avail_out, env.rbs_size - 1);
1632 bl->id_2_memop_avail = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
1633 memset(bl->id_2_memop_avail, 0, env.rbs_size * sizeof(bl->id_2_memop[0]));
1635 bl->anticL_in = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1636 rbitset_set(bl->anticL_in, env.rbs_size - 1);
1638 bl->id_2_memop = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
1639 memset(bl->id_2_memop, 0, env.rbs_size * sizeof(bl->id_2_memop[0]));
1642 // dump_block_list(&env);
1647 insert_Loads_upwards();
1650 /* over all blocks in reverse post order */
1651 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
1652 do_replacements(bl);
1655 /* not only invalidate but free them. We might allocate new out arrays
1656 on our obstack which will be deleted yet. */
1658 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
1662 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1663 ir_nodemap_destroy(&env.adr_map);
1664 obstack_free(&env.obst, NULL);
1666 current_ir_graph = rem;
1668 return env.changed != 0;