2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Dataflow driven Load/Store optimizations, uses some ideas from
24 * @author Michael Beck
39 #include "irnodemap.h"
40 #include "raw_bitset.h"
45 * Mapping an address to an dense ID.
47 typedef struct address_entry_t {
48 unsigned id; /**< The ID */
55 FLAG_KILL_LOADS = 1, /**< KILL all Loads */
56 FLAG_KILL_STORES = 2, /**< KILL all Stores */
57 FLAG_KILLED_NODE = 4, /**< this node was killed */
58 FLAG_EXCEPTION = 8, /**< this node has exception flow */
59 FLAG_IGNORE = 16, /**< ignore this node (volatile or other) */
60 /** this memop KILLS all addresses */
61 FLAG_KILL_ALL = FLAG_KILL_LOADS|FLAG_KILL_STORES
65 * A value: This represents a value stored at a given address in
66 * memory. Do not confuse with values from value numbering.
68 typedef struct value_t value_t;
70 ir_node *address; /**< the address of this value */
71 ir_node *value; /**< the value itself */
72 ir_mode *mode; /**< the mode of the value */
73 unsigned id; /**< address id */
77 * A memop describes an memory-related operation.
78 * These are Loads/Store and all other ops that might modify
79 * memory (Calls, CopyB) or causing exceptions.
81 typedef struct memop_t memop_t;
83 value_t value; /**< the value of this memop: only defined for Load/Store */
84 ir_node *node; /**< the memory op itself */
85 ir_node *mem; /**< the memory FROM this node */
86 ir_node *replace; /**< the replacement node if this memop is replaced */
87 memop_t *next; /**< links to the next memory op in the block in forward order. */
88 memop_t *prev; /**< links to the previous memory op in the block in forward order. */
89 unsigned flags; /**< memop flags */
93 * Additional data for every basic block.
95 typedef struct block_t block_t;
97 memop_t *memop_forward; /**< topologically sorted list of memory ops in this block */
98 memop_t *memop_backward; /**< last memop in the list */
99 unsigned *avail_out; /**< out-set of available addresses */
100 memop_t **id_2_memop_avail; /**< maps avail address ids to memops */
101 unsigned *anticL_in; /**< in-set of anticipated Load addresses */
102 memop_t **id_2_memop_antic; /**< maps anticipated address ids to memops */
103 ir_node *block; /**< the associated block */
104 block_t *forward_next; /**< next block entry for forward iteration */
105 block_t *backward_next; /**< next block entry for backward iteration */
106 memop_t *avail; /**< used locally for the avail map */
110 * Metadata for this pass.
112 typedef struct ldst_env_t {
113 struct obstack obst; /**< obstack for temporary data */
114 ir_nodemap_t adr_map; /**< Map addresses to */
115 block_t *forward; /**< Inverse post-order list of all blocks Start->End */
116 block_t *backward; /**< Inverse post-order list of all blocks End->Start */
117 ir_node *start_bl; /**< start block of the current graph */
118 ir_node *end_bl; /**< end block of the current graph */
119 unsigned *curr_set; /**< current set of addresses */
120 memop_t **curr_id_2_memop; /**< current map of address ids to memops */
121 unsigned curr_adr_id; /**< number for address mapping */
122 unsigned n_mem_ops; /**< number of memory operations (Loads/Stores) */
123 unsigned rbs_size; /**< size of all bitsets in bytes */
124 int changed; /**< Flags for changed graph state */
129 static firm_dbg_module_t *dbg;
131 /* the one and only environment */
135 * Dumps the block list.
137 * @param ldst environment
139 static void dump_block_list(ldst_env *env) {
144 for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
145 DB((dbg, LEVEL_2, "%+F {", entry->block));
148 for (op = entry->memop_forward; op != NULL; op = op->next) {
150 DB((dbg, LEVEL_2, "\n\t"));
151 } DB((dbg, LEVEL_2, "%+F", op->node));
152 if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
153 DB((dbg, LEVEL_2, "X"));
154 else if (op->flags & FLAG_KILL_LOADS)
155 DB((dbg, LEVEL_2, "L"));
156 else if (op->flags & FLAG_KILL_STORES)
157 DB((dbg, LEVEL_2, "S"));
158 DB((dbg, LEVEL_2, ", "));
162 DB((dbg, LEVEL_2, "\n}\n\n"));
167 * Dumps the current set.
169 * @param bl current block
170 * @param s name of the set
172 static void dump_curr(block_t *bl, const char *s) {
174 unsigned end = env.n_mem_ops * 2 - 1;
177 DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
179 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
180 memop_t *op = env.curr_id_2_memop[pos];
183 DB((dbg, LEVEL_2, "\n\t"));
185 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
188 DB((dbg, LEVEL_2, "\n}\n"));
192 #define dump_block_list()
193 #define dump_curr(bl, s)
194 #endif /* DEBUG_libfirm */
196 /** Get the block entry for a block node */
197 static block_t *get_block_entry(const ir_node *block) {
198 assert(is_Block(block));
200 return get_irn_link(block);
203 /** Get the memop entry for a memory operation node */
204 static memop_t *get_irn_memop(const ir_node *irn) {
205 assert(! is_Block(irn));
206 return get_irn_link(irn);
210 * Walk over the memory edges from definition to users.
212 * @param irn start node
213 * @param pre pre walker function
214 * @param post post walker function
215 * @param ctx context parameter for the walker functions
217 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx) {
221 mark_irn_visited(irn);
226 mode = get_irn_mode(irn);
227 if (mode == mode_M) {
228 /* every successor uses memory */
229 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
230 ir_node *succ = get_irn_out(irn, i);
232 if (! irn_visited(succ))
233 walk_memory(succ, pre, post, ctx);
235 } else if (mode == mode_T) {
236 /* only some Proj's uses memory */
237 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
238 ir_node *proj = get_irn_out(irn, i);
240 if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
241 walk_memory(proj, pre, post, ctx);
249 * Walks over all memory nodes of a graph.
252 * @param pre pre walker function
253 * @param post post walker function
254 * @param ctx context parameter for the walker functions
256 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx) {
257 inc_irg_visited(irg);
259 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
262 * there are two possible sources for memory: initial_mem and nomem
263 * we ignore nomem as this should NOT change the memory
265 walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
267 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
271 * Block walker: allocate an block entry for every block.
273 static void prepare_blocks(ir_node *block, void *ctx) {
274 block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
278 entry->memop_forward = NULL;
279 entry->memop_backward = NULL;
280 entry->avail_out = NULL;
281 entry->id_2_memop_avail = NULL;
282 entry->anticL_in = NULL;
283 entry->id_2_memop_antic = NULL;
284 entry->block = block;
285 entry->forward_next = env.forward;
286 entry->backward_next = NULL;
288 set_irn_link(block, entry);
290 /* create the list in inverse order */
292 /* remember temporary the last one */
293 env.backward = entry;
297 * Block walker: create backward links for the memops of a block.
299 static void collect_backward(ir_node *block, void *ctx) {
300 block_t *entry = get_block_entry(block);
306 * Do NOT link in the end block yet. We want it to be
307 * the first in the list. This is NOT guaranteed by the walker
308 * if we have endless loops.
310 if (block != env.end_bl) {
311 entry->backward_next = env.backward;
313 /* create the list in inverse order */
314 env.backward = entry;
317 /* create backward links for all memory ops */
319 for (op = entry->memop_forward; op != NULL; op = op->next) {
323 entry->memop_backward = last;
329 * @param irn the IR-node representing the memop
331 static memop_t *alloc_memop(ir_node *irn) {
332 memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
334 m->value.address = NULL;
335 m->value.value = NULL;
336 m->value.mode = NULL;
343 set_irn_link(irn, m);
348 * Create a memop for a Phi-replacement.
350 * @param op the memop to clone
351 * @param phi the Phi-node representing the new value
353 static memop_t *clone_memop_phi(memop_t *op, ir_node *phi) {
354 memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
356 m->value = op->value;
357 m->value.value = phi;
364 set_irn_link(phi, m);
369 * Register an address and allocate an ID for it.
371 * @param adr the IR-node representing the address
373 static unsigned register_address(ir_node *adr) {
374 address_entry *entry;
376 /* skip Confirms and Casts */
378 if (is_Confirm(adr)) {
379 adr = get_Confirm_value(adr);
383 adr = get_Cast_op(adr);
387 entry = ir_nodemap_get(&env.adr_map, adr);
391 entry = obstack_alloc(&env.obst, sizeof(*entry));
393 entry->id = env.curr_adr_id++;
394 ir_nodemap_insert(&env.adr_map, adr, entry);
400 * Return the memory properties of a call node.
402 * @param call the call node
404 * return a bitset of mtp_property_const and mtp_property_pure
406 static unsigned get_Call_memory_properties(ir_node *call) {
407 ir_type *call_tp = get_Call_type(call);
408 unsigned prop = get_method_additional_properties(call_tp);
410 /* check first the call type */
411 if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
412 /* try the called entity */
413 ir_node *ptr = get_Call_ptr(call);
415 if (is_Global(ptr)) {
416 ir_entity *ent = get_Global_entity(ptr);
418 prop = get_entity_additional_properties(ent);
421 return prop & (mtp_property_const|mtp_property_pure);
425 * Update a memop for a Load.
429 static void update_Load_memop(memop_t *m) {
431 ir_node *load = m->node;
432 ir_node *adr = get_Load_ptr(load);
434 if (get_Load_volatility(load) == volatility_is_volatile)
435 m->flags |= FLAG_IGNORE;
437 m->value.address = adr;
439 for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
440 ir_node *proj = get_irn_out(load, i);
442 /* beware of keep edges */
446 switch (get_Proj_proj(proj)) {
448 m->value.value = proj;
449 m->value.mode = get_irn_mode(proj);
451 case pn_Load_X_except:
452 m->flags |= FLAG_EXCEPTION;
460 if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
461 /* only create an address if this node is NOT killed immediately or ignored */
462 m->value.id = register_address(adr);
465 /* no user, KILL it */
466 m->flags |= FLAG_KILLED_NODE;
471 * Update a memop for a Store.
475 static void update_Store_memop(memop_t *m) {
477 ir_node *store = m->node;
478 ir_node *adr = get_Store_ptr(store);
480 if (get_Store_volatility(store) == volatility_is_volatile) {
481 m->flags |= FLAG_IGNORE;
483 /* only create an address if this node is NOT ignored */
484 m->value.id = register_address(adr);
488 m->value.address = adr;
490 for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
491 ir_node *proj = get_irn_out(store, i);
493 /* beware of keep edges */
497 switch (get_Proj_proj(proj)) {
498 case pn_Store_X_except:
499 m->flags |= FLAG_EXCEPTION;
506 m->value.value = get_Store_value(store);
507 m->value.mode = get_irn_mode(m->value.value);
511 * Update a memop for a Call.
515 static void update_Call_memop(memop_t *m) {
516 ir_node *call = m->node;
517 unsigned prop = get_Call_memory_properties(call);
520 if (prop & mtp_property_const) {
521 /* A constant call did NOT use memory at all, we
522 can kick it from the list. */
523 } else if (prop & mtp_property_pure) {
524 /* pure calls READ memory */
525 m->flags = FLAG_KILL_STORES;
527 m->flags = FLAG_KILL_ALL;
529 for (i = get_irn_n_outs(call) - 1; i >= 0; --i) {
530 ir_node *proj = get_irn_out(call, i);
532 /* beware of keep edges */
536 switch (get_Proj_proj(proj)) {
537 case pn_Call_X_except:
538 m->flags |= FLAG_EXCEPTION;
540 case pn_Call_M_regular:
548 * Update a memop for a Div/Mod/Quot/DivMod.
552 static void update_DivOp_memop(memop_t *m) {
553 ir_node *div = m->node;
556 for (i = get_irn_n_outs(div) - 1; i >= 0; --i) {
557 ir_node *proj = get_irn_out(div, i);
559 /* beware of keep edges */
563 switch (get_Proj_proj(proj)) {
564 case pn_Generic_X_except:
565 m->flags |= FLAG_EXCEPTION;
567 case pn_Generic_M_regular:
575 * Update a memop for a Phi.
579 static void update_Phi_memop(memop_t *m) {
580 /* the Phi is it's own mem */
585 * Memory walker: collect all memory ops and build topological lists.
587 static void collect_memops(ir_node *irn, void *ctx) {
594 /* we can safely ignore ProjM's except the initial memory */
595 if (irn != get_irg_initial_mem(current_ir_graph))
599 op = alloc_memop(irn);
600 block = get_nodes_block(irn);
601 entry = get_block_entry(block);
604 update_Phi_memop(op);
605 /* Phis must be always placed first */
606 op->next = entry->memop_forward;
607 entry->memop_forward = op;
608 if (entry->memop_backward == NULL)
609 entry->memop_backward = op;
611 switch (get_irn_opcode(irn)) {
613 update_Load_memop(op);
616 update_Store_memop(op);
619 update_Call_memop(op);
631 /* we can those to find the memory edge */
637 update_DivOp_memop(op);
641 /* TODO: handle some builtins */
643 /* unsupported operation */
644 op->flags = FLAG_KILL_ALL;
648 /* all other should be placed last */
649 if (entry->memop_backward == NULL) {
650 entry->memop_forward = entry->memop_backward = op;
652 entry->memop_backward->next = op;
653 entry->memop_backward = op;
659 * Find an address in the current set.
661 * @param bl the block
662 * @param value the value to be searched for
664 static memop_t *find_address(const block_t *bl, const value_t *value) {
665 if (rbitset_is_set(env.curr_set, value->id)) {
666 memop_t *res = env.curr_id_2_memop[value->id];
668 if (res->value.mode == value->mode)
670 /* allow hidden casts */
671 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
672 get_mode_arithmetic(value->mode) == irma_twos_complement &&
673 get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
680 * Find an address in the avail_out set.
682 * @param bl the block
683 * @param value the value to be searched for
685 static memop_t *find_address_avail(const block_t *bl, const value_t *value) {
686 if (rbitset_is_set(bl->avail_out, value->id)) {
687 memop_t *res = bl->id_2_memop_avail[value->id];
689 if (res->value.mode == value->mode)
691 /* allow hidden casts */
692 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
693 get_mode_arithmetic(value->mode) == irma_twos_complement &&
694 get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
701 * Kill all Loads from the current set.
703 static void kill_all_loads(void) {
705 unsigned end = env.n_mem_ops * 2 - 1;
707 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
708 memop_t *op = env.curr_id_2_memop[pos];
710 if (! is_Store(op->node))
711 rbitset_clear(env.curr_set, pos);
716 * Kill all Stores from the current set.
718 static void kill_all_stores(void) {
720 unsigned end = env.n_mem_ops * 2 - 1;
722 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
723 memop_t *op = env.curr_id_2_memop[pos];
725 if (is_Store(op->node))
726 rbitset_clear(env.curr_set, pos);
731 * Kill all addresses from the current set.
733 static void kill_all(void) {
734 rbitset_clear_all(env.curr_set, env.rbs_size);
737 rbitset_set(env.curr_set, env.rbs_size - 1);
742 * Kill Stores that are not alias free due to a Load value from the current set.
744 * @param value the Load value
746 static void kill_stores(const value_t *value) {
748 unsigned end = env.n_mem_ops * 2 - 1;
750 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
751 memop_t *op = env.curr_id_2_memop[pos];
753 if (is_Store(op->node)) {
754 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
755 op->value.address, op->value.mode)) {
756 rbitset_clear(env.curr_set, pos);
757 env.curr_id_2_memop[pos] = NULL;
764 * Kill memops that are not alias free due to a Store value from the current set.
766 * @param value the Store value
768 static void kill_memops(const value_t *value) {
770 unsigned end = env.n_mem_ops * 2 - 1;
772 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
773 memop_t *op = env.curr_id_2_memop[pos];
775 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
776 op->value.address, op->value.mode)) {
777 rbitset_clear(env.curr_set, pos);
778 env.curr_id_2_memop[pos] = NULL;
784 * Add the value of a memop to the current set.
786 * @param op the memory op
788 static void add_memop(memop_t *op) {
789 rbitset_set(env.curr_set, op->value.id);
790 env.curr_id_2_memop[op->value.id] = op;
794 * Add the value of a memop to the avail_out set.
796 * @param bl the block
797 * @param op the memory op
799 static void add_memop_avail(block_t *bl, memop_t *op) {
800 rbitset_set(bl->avail_out, op->value.id);
801 bl->id_2_memop_avail[op->value.id] = op;
805 * Update a value of a memop to the avail_out set.
807 * @param bl the block
808 * @param op the memory op
810 static void update_memop_avail(block_t *bl, memop_t *op) {
811 if (rbitset_is_set(bl->avail_out, op->value.id))
812 bl->id_2_memop_avail[op->value.id] = op;
816 * Add a Conv if needed.
818 static ir_node *conv_to(ir_node *irn, ir_mode *mode) {
819 if (get_irn_mode(irn) != mode) {
820 ir_node *block = get_nodes_block(irn);
821 return new_r_Conv(current_ir_graph, block, irn, mode);
827 * build a phi definition for a value in the given block
829 * @param block the block
830 * @param value the value
832 * @return the definition
834 static ir_node *build_phi_definition(ir_node *block, const value_t *value) {
838 ir_mode *mode = value->mode;
840 /* no: we need a new Phi */
841 n = get_Block_n_cfgpreds(block);
844 block = get_Block_cfgpred_block(block, 0);
845 n = get_Block_n_cfgpreds(block);
846 mop = find_address_avail(get_block_entry(block), value);
849 /* this can only happen, if modes cannot be converted */
855 NEW_ARR_A(ir_node *, in, n);
856 for (i = n - 1; i >= 0; --i) {
857 ir_node *pred_bl = get_Block_cfgpred_block(block, i);
860 mop = find_address_avail(get_block_entry(pred_bl), value);
864 def_bl = get_nodes_block(mop->value.value);
866 if (block_dominates(def_bl, pred_bl))
867 in[i] = conv_to(mop->value.value, mode);
869 ir_node *def = build_phi_definition(pred_bl, &mop->value);
872 in[i] = conv_to(def, mode);
876 phi = new_r_Phi(current_ir_graph, block, n, in, mode);
877 DB((dbg, LEVEL_2, "Created new Phi %+F for value %+F in block %+F\n", phi, value->value, block));
882 * find a definition for a value in the given block or above.
884 * @param block the block
885 * @param op the memop: the definition must dominate it
886 * @param value the value
888 * @return the definition or NULL if modes are different
890 static ir_node *find_definition(ir_node *block, const memop_t *op, const value_t *value) {
891 ir_node *def_block = get_nodes_block(value->value);
893 if (def_block == block) {
894 /* the value is in our block: check, if it is above in the memory list */
897 for (p = op->prev; p != NULL; p = p->prev) {
898 if (!(p->flags & FLAG_KILLED_NODE) &&
899 p->value.address == value->address) {
901 assert(p->value.value == value->value);
902 return p->value.value;
905 } else if (block_dominates(def_block, block)) {
906 /* strictly dominates */
910 return build_phi_definition(block, value);
914 * Mark a Load memop to be replace by a definition
916 * @param op the Load memop
918 static void mark_replace_load(memop_t *op, ir_node *def) {
920 op->flags |= FLAG_KILLED_NODE;
925 * Mark a Store memop to be removed.
927 * @param op the Store memop
929 static void mark_remove_store(memop_t *op) {
930 op->flags |= FLAG_KILLED_NODE;
935 * Do forward dataflow analysis on the given block and calculate the
936 * GEN and KILL in the current (avail) set.
938 * @param bl the block
940 static void calc_gen_kill_avail(block_t *bl) {
944 for (op = bl->memop_forward; op != NULL; op = op->next) {
945 switch (get_irn_opcode(op->node)) {
953 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
954 /* do we have this already? */
955 memop_t *other = find_address(bl, &op->value);
957 def = find_definition(bl->block, op, &other->value);
960 if (is_Store(other->node)) {
962 DB((dbg, LEVEL_1, "RAW %+F <- %+F(%+F)\n", op->node, def, other->node));
965 DB((dbg, LEVEL_1, "RAR %+F <- %+F(%+F)\n", op->node, def, other->node));
968 mark_replace_load(op, def);
975 kill_stores(&op->value);
981 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
982 /* do we have this store already */
983 memop_t *other = find_address(bl, &op->value);
985 if (is_Store(other->node)) {
986 if (op != other && get_nodes_block(other->node) == get_nodes_block(op->node)) {
988 * A WAW in the same block we can kick the first store.
989 * This is a shortcut: we know that the second Store will be anticipated
992 DB((dbg, LEVEL_1, "WAW %+F <- %+F\n", op->node, other->node));
993 mark_remove_store(other);
994 /* FIXME: a Load might be get freed due to this killed store */
996 } else if (other->value.value == op->value.value) {
998 DB((dbg, LEVEL_1, "WAR %+F <- %+F\n", op->node, other->node));
999 mark_remove_store(op);
1001 /* we overwrite the value that was loaded */
1005 /* add this value */
1006 kill_memops(&op->value);
1012 switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
1013 case FLAG_KILL_LOADS|FLAG_KILL_STORES:
1016 case FLAG_KILL_LOADS:
1019 case FLAG_KILL_STORES:
1029 #define BYTE_SIZE(x) (((x) + 7) >> 3)
1032 * Do forward dataflow analysis on a given block to calculate the avail_out set
1033 * for this block only.
1035 * @param block the block
1037 static void forward_avail(block_t *bl) {
1038 /* fill the data from the current block */
1039 env.curr_id_2_memop = bl->id_2_memop_avail;
1040 env.curr_set = bl->avail_out;
1042 calc_gen_kill_avail(bl);
1043 dump_curr(bl, "Avail_out");
1047 * Do backward dataflow analysis on a given block to calculate the antic set
1048 * of Loaded addresses.
1050 * @param bl the block
1052 * @return non-zero if the set has changed since last iteration
1054 static int backward_antic(block_t *bl) {
1056 int n = get_Block_n_cfg_outs(bl->block);
1059 ir_node *succ = get_Block_cfg_out(bl->block, 0);
1060 block_t *succ_bl = get_block_entry(succ);
1063 rbitset_cpy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1064 memcpy(env.curr_id_2_memop, succ_bl->id_2_memop_antic, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1066 for (i = n - 1; i > 0; --i) {
1067 ir_node *succ = get_Block_cfg_out(bl->block, i);
1068 block_t *succ_bl = get_block_entry(succ);
1070 rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1073 /* block ends with a noreturn call */
1078 /* cleanup: kill those Loads which address is not available */
1079 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1080 memop_t *op = succ_bl->id_2_memop[pos];
1081 ir_node *ptr = get_Load_ptr(op->node);
1082 ir_node *ptr_bl = get_nodes_block(ptr);
1084 if (!block_dominates(ptr_bl, bl->block))
1085 rbitset_clear(env.curr_set, pos);
1089 dump_curr(bl, "AnticL_out");
1091 for (op = bl->memop_backward; op != NULL; op = op->prev) {
1092 switch (get_irn_opcode(op->node)) {
1100 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1106 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1107 /* a Store: check which memops must be killed */
1108 kill_memops(&op->value);
1112 switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
1113 case FLAG_KILL_LOADS|FLAG_KILL_STORES:
1116 case FLAG_KILL_LOADS:
1119 case FLAG_KILL_STORES:
1120 /*kill_all_stores();*/
1128 memcpy(bl->id_2_memop_antic, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1129 if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1131 rbitset_cpy(bl->anticL_in, env.curr_set, env.rbs_size);
1132 dump_curr(bl, "AnticL_in*");
1135 dump_curr(bl, "AnticL_in");
1140 * Replace a Load memop by a already known value.
1142 * @param op the Load memop
1144 static void replace_load(memop_t *op) {
1145 ir_node *load = op->node;
1146 ir_node *def = skip_Id(op->replace);
1151 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1153 if (op->flags & FLAG_EXCEPTION) {
1154 /* bad: this node is unused and executed for exception only */
1155 DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1158 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1160 for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
1161 ir_node *proj = get_irn_out(load, i);
1163 switch (get_Proj_proj(proj)) {
1165 exchange(proj, get_Load_mem(load));
1168 mode = get_irn_mode(proj);
1169 if (get_irn_mode(def) != mode) {
1171 dbg_info *db = get_irn_dbg_info(load);
1172 ir_node *block = get_nodes_block(proj);
1173 def = new_rd_Conv(db, current_ir_graph, block, def, mode);
1175 exchange(proj, def);
1177 case pn_Load_X_except:
1178 exchange(proj, new_Bad());
1180 case pn_Load_X_regular:
1181 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1184 panic("Unknown Proj from Load");
1190 * Remove a Store memop.
1192 * @param op the Store memop
1194 static void remove_store(memop_t *op) {
1195 ir_node *store = op->node;
1198 DB((dbg, LEVEL_1, "Removing %+F\n", store));
1199 for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
1200 ir_node *proj = get_irn_out(store, i);
1202 switch (get_Proj_proj(proj)) {
1204 exchange(proj, get_Store_mem(store));
1206 case pn_Store_X_except:
1207 exchange(proj, new_Bad());
1209 case pn_Store_X_regular:
1210 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(store)));
1213 panic("Unknown Proj from Store");
1220 * Do all necessary replacements for a given block.
1222 * @param bl the block
1224 static void do_replacements(block_t *bl) {
1227 for (op = bl->memop_forward; op != NULL; op = op->next) {
1228 if (op->flags & FLAG_KILLED_NODE) {
1229 switch (get_irn_opcode(op->node)) {
1242 * Calculate the Avail_out sets for all basic blocks.
1244 static void calcAvail(void) {
1245 memop_t **tmp_memop = env.curr_id_2_memop;
1246 unsigned *tmp_set = env.curr_set;
1249 /* calculate avail_out */
1250 DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1252 /* interate over all blocks in in any order, skip the start block */
1253 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1257 /* restore the current sets */
1258 env.curr_id_2_memop = tmp_memop;
1259 env.curr_set = tmp_set;
1263 * Calculate the Antic_in sets for all basic blocks.
1265 static void calcAntic(void) {
1268 /* calculate antic_out */
1269 DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1274 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1278 /* over all blocks in reverse post order */
1279 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1280 need_iter |= backward_antic(bl);
1283 } while (need_iter);
1284 DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1288 * Return the node representing the last memory in a block.
1290 * @param bl the block
1292 static ir_node *find_first_memory(block_t *bl) {
1294 if (bl->memop_forward != NULL) {
1295 return bl->memop_forward->node;
1297 /* if there is NO memory in this block, go to the post dominator */
1298 bl = get_block_entry(get_Block_ipostdom(bl->block));
1303 * Return the node representing the last memory in a block.
1305 * @param bl the block
1307 static ir_node *find_last_memory(block_t *bl) {
1309 if (bl->memop_backward != NULL) {
1310 return bl->memop_backward->mem;
1312 /* if there is NO memory in this block, go to the dominator */
1313 bl = get_block_entry(get_Block_idom(bl->block));
1318 * Reroute all memory users of old memory
1319 * to a new memory IR-node.
1321 * @param omem the old memory IR-node
1322 * @param nmem the new memory IR-node
1324 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
1327 for (i = get_irn_n_outs(omem) - 1; i >= 0; --i) {
1329 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1331 set_irn_n(user, n_pos, nmem);
1334 /* all edges previously point to omem now point to nmem */
1335 nmem->out = omem->out;
1339 * Reroute memory users of old memory that are dominated by a given block
1340 * to a new memory IR-node.
1342 * @param omem the old memory IR-node
1343 * @param nmem the new memory IR-node
1344 * @param pass_bl the block the memory must pass
1346 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl) {
1347 int i, j, n = get_irn_n_outs(omem);
1348 ir_def_use_edge *edges = NEW_ARR_D(ir_def_use_edge, &env.obst, n + 1);
1350 for (i = j = 0; i < n; ++i) {
1352 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1353 ir_node *use_bl = get_nodes_block(user);
1357 use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1359 if (block_dominates(pass_bl, use_bl)) {
1360 /* found an user that is dominated */
1362 edges[j].pos = n_pos;
1363 edges[j].use = user;
1365 set_irn_n(user, n_pos, nmem);
1369 /* Modify the out structure: we create a new out edge array on our
1370 temporary obstack here. This should be no problem, as we invalidate the edges
1371 at the end either. */
1372 /* first entry is used for the length */
1380 static int insert_Load(block_t *bl) {
1381 ir_node *block = bl->block;
1382 int i, n = get_Block_n_cfgpreds(block);
1384 unsigned end = env.n_mem_ops * 2 - 1;
1386 ir_node *pred = get_Block_cfgpred_block(bl->block, 0);
1387 block_t *pred_bl = get_block_entry(pred);
1389 DB((dbg, LEVEL_3, "processing %+F\n", block));
1391 rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
1396 /* more than one predecessors, calculate the join for all avail_outs */
1397 for (i = n - 1; i > 0; --i) {
1398 ir_node *pred = skip_Proj(get_Block_cfgpred(block, i));
1399 block_t *pred_bl = get_block_entry(get_nodes_block(pred));
1401 rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
1403 if (is_Load(pred) || is_Store(pred)) {
1404 /* We reached this block by an exception from a Load or Store:
1405 * the memop creating the exception was NOT completed than, kill it
1407 memop_t *exc_op = get_irn_memop(pred);
1408 rbitset_clear(env.curr_set, exc_op->value.id);
1412 /* ensure that all values are in the map */
1413 for (pos = env.rbs_size - 1; pos >= 0; --pos) {
1414 if (! rbitset_is_set(env.curr_set, pos))
1415 env.curr_id_2_memop[pos] = NULL;
1417 for (i = n - 1; i >= 0; --i) {
1418 ir_node *pred = get_Block_cfgpred_block(bl->block, i);
1419 block_t *pred_bl = get_block_entry(pred);
1421 if (pred_bl->id_2_memop_avail[pos] != NULL) {
1422 env.curr_id_2_memop[pos] = pred_bl->id_2_memop_avail[pos];
1429 /* only one predecessor, simply copy the map */
1430 memcpy(env.curr_id_2_memop, pred_bl->id_2_memop_avail, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
1434 /* recalculate avail by gen and kill */
1435 calc_gen_kill_avail(bl);
1437 if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
1438 /* the avail set has changed */
1439 rbitset_cpy(bl->avail_out, env.curr_set, env.rbs_size);
1440 memcpy(bl->id_2_memop_avail, env.curr_id_2_memop, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1441 dump_curr(bl, "Avail_out*");
1448 /* check for partly redundant values */
1449 for (pos = rbitset_next(bl->anticL_in, pos, 1); pos != end; pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
1450 memop_t *op = bl->id_2_memop_antic[pos];
1451 int have_some, all_same;
1454 assert(is_Load(op->node));
1456 if (op->flags & FLAG_KILLED_NODE)
1458 DB((dbg, LEVEL_3, "anticipated %+F\n", op->node));
1463 for (i = n - 1; i >= 0; --i) {
1464 ir_node *pred = get_Block_cfgpred_block(block, i);
1465 block_t *pred_bl = get_block_entry(pred);
1466 memop_t *e = find_address_avail(pred_bl, &op->value);
1469 ir_node *block = get_nodes_block(op->value.address);
1470 if (! block_dominates(block, pred)) {
1471 /* cannot place a copy here */
1475 DB((dbg, LEVEL_3, "%+F is not available in predecessor %+F\n", op->node, pred));
1476 pred_bl->avail = NULL;
1481 DB((dbg, LEVEL_3, "%+F is available for %+F in predecessor %+F\n", e->node, op->node, pred));
1484 else if (first != e->node)
1488 if (have_some && !all_same) {
1489 ir_mode *mode = op->value.mode;
1492 NEW_ARR_A(ir_node *, in, n);
1494 for (i = n - 1; i >= 0; --i) {
1495 ir_node *pred = get_Block_cfgpred_block(block, i);
1496 block_t *pred_bl = get_block_entry(pred);
1498 if (pred_bl->avail == NULL) {
1499 /* create a new Load here and make to make it fully redundant */
1500 dbg_info *db = get_irn_dbg_info(op->node);
1501 ir_node *last_mem = find_last_memory(pred_bl);
1502 ir_node *load, *def;
1505 assert(last_mem != NULL);
1506 load = new_rd_Load(db, current_ir_graph, pred, last_mem, op->value.address, mode, cons_none);
1507 def = new_r_Proj(current_ir_graph, pred, load, mode, pn_Load_res);
1508 DB((dbg, LEVEL_1, "Created new %+F in %+F for party redundant %+F\n", load, pred, op->node));
1510 new_op = alloc_memop(load);
1511 new_op->mem = new_r_Proj(current_ir_graph, pred, load, mode_M, pn_Load_M);
1512 new_op->value.address = op->value.address;
1513 new_op->value.id = op->value.id;
1514 new_op->value.mode = mode;
1515 new_op->value.value = def;
1517 new_op->prev = pred_bl->memop_backward;
1518 pred_bl->memop_backward = new_op;
1520 if (get_nodes_block(last_mem) == pred) {
1521 /* We have add a new last memory op in pred block.
1522 If pred had already a last mem, reroute all memory
1524 reroute_all_mem_users(last_mem, new_op->mem);
1526 /* reroute only those memory going through the pre block */
1527 reroute_mem_through(last_mem, new_op->mem, pred);
1530 /* we added this load at the end, so it will be avail anyway */
1531 add_memop_avail(pred_bl, new_op);
1532 pred_bl->avail = new_op;
1534 in[i] = pred_bl->avail->value.value;
1536 phi = new_r_Phi(current_ir_graph, block, n, in, mode);
1537 DB((dbg, LEVEL_1, "Created new %+F in %+F for now redundant %+F\n", phi, block, op->node));
1539 if (get_nodes_block(op->node) == block) {
1540 /* The redundant node was in the current block:
1541 In that case, DO NOT update avail_out. If it was NOT
1542 avail although it is executed in this bLock, it is killed by a later
1545 memop_t *phi_op = clone_memop_phi(op, phi);
1547 update_memop_avail(bl, phi_op);
1549 mark_replace_load(op, phi);
1551 /* The redundant node is NOT in the current block and anticipated. */
1552 memop_t *phi_op = clone_memop_phi(op, phi);
1554 add_memop_avail(bl, phi_op);
1556 /* propagate it downwards */
1559 /* clear it so we do not found it the next iteration */
1560 rbitset_clear(bl->anticL_in, pos);
1567 * Insert Loads upwards.
1569 static void insert_Loads_upwards(void) {
1573 /* recalculate antic_out and insert Loads */
1574 DB((dbg, LEVEL_2, "Inserting Loads\n"));
1578 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1582 /* over all blocks in reverse post order, skip the start block */
1583 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1584 need_iter |= insert_Load(bl);
1587 } while (need_iter);
1589 DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
1592 int opt_ldst(ir_graph *irg) {
1594 ir_graph *rem = current_ir_graph;
1596 current_ir_graph = irg;
1598 FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
1599 // firm_dbg_set_mask(dbg, -1);
1601 DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
1603 /* we need landing pads */
1604 remove_critical_cf_edges(irg);
1606 dump_ir_block_graph(irg, "-XXX");
1608 if (get_opt_alias_analysis()) {
1609 assure_irg_entity_usage_computed(irg);
1610 assure_irp_globals_entity_usage_computed();
1613 obstack_init(&env.obst);
1614 ir_nodemap_init(&env.adr_map);
1617 env.backward = NULL;
1618 env.curr_adr_id = 0;
1621 env.start_bl = get_irg_start_block(irg);
1622 env.end_bl = get_irg_end_block(irg);
1625 assure_irg_outs(irg);
1627 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1629 /* first step: allocate block entries: produces an
1630 inverse post-order list for the CFG */
1631 set_irn_link(env.end_bl, NULL);
1632 irg_out_block_walk(get_irg_start_block(irg), NULL, prepare_blocks, NULL);
1634 if (get_block_entry(env.end_bl) == NULL) {
1636 * The end block is NOT reachable due to endless loops
1637 * or no_return calls. Ensure that it is initialized.
1638 * Note that this places the entry for the end block first, so we must fix this.
1639 * env.backwards points to th last block for this purpose.
1641 prepare_blocks(env.end_bl, NULL);
1644 env.forward = bl->forward_next;
1645 bl->forward_next = NULL;
1647 env.backward->forward_next = bl;
1650 /* second step: find and sort all memory ops */
1651 walk_memory_irg(irg, collect_memops, NULL, NULL);
1653 if (env.n_mem_ops == 0) {
1658 /* create the backward links */
1659 env.backward = NULL;
1660 irg_block_walk_graph(irg, NULL, collect_backward, NULL);
1662 /* link the end block in */
1663 bl = get_block_entry(env.end_bl);
1664 bl->backward_next = env.backward;
1667 /* check that we really start with the start / end block */
1668 assert(env.forward->block == env.start_bl);
1669 assert(env.backward->block == env.end_bl);
1671 /* create address sets: we know that 2 * n_mem_ops - 1 is an upper bound for all possible addresses */
1672 env.rbs_size = 2 * env.n_mem_ops;
1674 /* create the current set */
1675 env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1676 rbitset_set(env.curr_set, env.rbs_size - 1);
1677 env.curr_id_2_memop = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
1678 memset(env.curr_id_2_memop, 0, env.rbs_size * sizeof(env.curr_id_2_memop[0]));
1680 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
1681 /* set sentinel bits */
1682 bl->avail_out = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1683 rbitset_set(bl->avail_out, env.rbs_size - 1);
1685 bl->id_2_memop_avail = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
1686 memset(bl->id_2_memop_avail, 0, env.rbs_size * sizeof(bl->id_2_memop_avail[0]));
1688 bl->anticL_in = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1689 rbitset_set(bl->anticL_in, env.rbs_size - 1);
1691 bl->id_2_memop_antic = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
1692 memset(bl->id_2_memop_antic, 0, env.rbs_size * sizeof(bl->id_2_memop_antic[0]));
1695 // dump_block_list(&env);
1700 insert_Loads_upwards();
1703 /* over all blocks in reverse post order */
1704 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
1705 do_replacements(bl);
1708 /* not only invalidate but free them. We might allocate new out arrays
1709 on our obstack which will be deleted yet. */
1711 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
1715 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1716 ir_nodemap_destroy(&env.adr_map);
1717 obstack_free(&env.obst, NULL);
1719 dump_ir_block_graph(irg, "-YYY");
1721 current_ir_graph = rem;
1723 return env.changed != 0;