2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Dataflow driven Load/Store optimizations, uses some ideas from
24 * @author Michael Beck
39 #include "irnodemap.h"
40 #include "raw_bitset.h"
45 * Mapping an address to an densed ID.
47 typedef struct address_entry_t {
48 unsigned id; /**< The ID */
55 FLAG_KILL_LOADS = 1, /**< KILL all Loads */
56 FLAG_KILL_STORES = 2, /**< KILL all Stores */
57 FLAG_KILLED_NODE = 4, /**< this node was killed */
58 FLAG_EXCEPTION = 8, /**< this node has exception flow */
59 FLAG_IGNORE = 16, /**< ignore this node (volatile or other) */
60 /** this memop KILLS all addresses */
61 FLAG_KILL_ALL = FLAG_KILL_LOADS|FLAG_KILL_STORES
65 * A value: This represents a value stored at a given address in
66 * memory. Do not confuse with values from value numbering.
68 typedef struct value_t value_t;
70 ir_node *address; /**< the address of this value */
71 ir_node *value; /**< the value itself */
72 ir_mode *mode; /**< the mode of the value */
73 unsigned id; /**< address id */
77 * A memop describes an memory-related operation.
78 * These are Loads/Store and all other ops that might modify
79 * memory (Calls, CopyB) or causing exceptions.
81 typedef struct memop_t memop_t;
83 value_t value; /**< the value of this memop: only defined for Load/Store */
84 ir_node *node; /**< the memory op itself */
85 ir_node *mem; /**< the memory FROM this node */
86 ir_node *replace; /**< the replacement node if this memop is replaced */
87 memop_t *next; /**< links to the next memory op in the block in forward order. */
88 memop_t *prev; /**< links to the previous memory op in the block in forward order. */
89 unsigned flags; /**< memop flags */
93 * Additional data for every basic block.
95 typedef struct block_t block_t;
97 memop_t *memop_forward; /**< topologically sorted list of memory ops in this block */
98 memop_t *memop_backward; /**< last memop in the list */
99 unsigned *avail_out; /**< out-set of available addresses */
100 memop_t **id_2_memop_avail; /**< maps avail address ids to memops */
101 unsigned *anticL_in; /**< in-set of anticipated Load addresses */
102 memop_t **id_2_memop; /**< maps address ids to memops */
103 ir_node *block; /**< the associated block */
104 block_t *forward_next; /**< next block entry for forward iteration */
105 block_t *backward_next; /**< next block entry for backward iteration */
106 memop_t *avail; /**< used locally for the avail map */
109 #define get_block_entry(block) ((block_t *)get_irn_link(block))
112 * Metadata for this pass.
114 typedef struct ldst_env_t {
115 struct obstack obst; /**< obstack for temporary data */
116 ir_nodemap_t adr_map; /**< Map addresses to */
117 block_t *forward; /**< Inverse post-order list of all blocks Start->End */
118 block_t *backward; /**< Inverse post-order list of all blocks End->Start */
119 ir_node *start_bl; /**< start block of the current graph */
120 unsigned *curr_set; /**< current set of addresses */
121 unsigned curr_adr_id; /**< number for address mapping */
122 unsigned n_mem_ops; /**< number of memory operations (Loads/Stores) */
123 unsigned rbs_size; /**< size of all bitsets in bytes */
124 int changed; /**< Flags for changed graph state */
129 static firm_dbg_module_t *dbg;
131 /* the one and only environment */
135 * Dumps the block list.
137 * @param ldst environment
139 static void dump_block_list(ldst_env *env) {
144 for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
145 DB((dbg, LEVEL_2, "%+F {", entry->block));
148 for (op = entry->memop_forward; op != NULL; op = op->next) {
150 DB((dbg, LEVEL_2, "\n\t"));
151 } DB((dbg, LEVEL_2, "%+F", op->node));
152 if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
153 DB((dbg, LEVEL_2, "X"));
154 else if (op->flags & FLAG_KILL_LOADS)
155 DB((dbg, LEVEL_2, "L"));
156 else if (op->flags & FLAG_KILL_STORES)
157 DB((dbg, LEVEL_2, "S"));
158 DB((dbg, LEVEL_2, ", "));
162 DB((dbg, LEVEL_2, "\n}\n\n"));
167 * Dumps the current set.
169 * @param bl current block
170 * @param s name of the set
172 static void dump_curr(block_t *bl, const char *s) {
174 unsigned end = env.n_mem_ops * 2 - 1;
177 DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
179 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
180 memop_t *op = bl->id_2_memop[pos];
183 DB((dbg, LEVEL_2, "\n\t"));
185 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
188 DB((dbg, LEVEL_2, "\n}\n"));
192 #define dump_block_list()
193 #define dump_curr(bl, s)
194 #endif /* DEBUG_libfirm */
197 * Walk over the memory edges from definition to users.
199 * @param irn start node
200 * @param pre pre walker function
201 * @param post post walker function
202 * @param ctx context parameter for the walker functions
204 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx) {
208 mark_irn_visited(irn);
213 mode = get_irn_mode(irn);
214 if (mode == mode_M) {
215 /* every successor uses memory */
216 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
217 ir_node *succ = get_irn_out(irn, i);
219 if (! irn_visited(succ))
220 walk_memory(succ, pre, post, ctx);
222 } else if (mode == mode_T) {
223 /* only some Proj's uses memory */
224 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
225 ir_node *proj = get_irn_out(irn, i);
227 if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
228 walk_memory(proj, pre, post, ctx);
236 * Walks over all memory nodes of a graph.
239 * @param pre pre walker function
240 * @param post post walker function
241 * @param ctx context parameter for the walker functions
243 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx) {
244 inc_irg_visited(irg);
246 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
249 * there are two possible sources for memory: initial_mem and nomem
250 * we ignore nomem as this should NOT change the memory
252 walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
254 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
258 * Block walker: allocate an block entry for every block.
260 static void prepare_blocks(ir_node *block, void *ctx) {
261 block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
265 entry->memop_forward = NULL;
266 entry->memop_backward = NULL;
267 entry->avail_out = NULL;
268 entry->id_2_memop_avail = NULL;
269 entry->anticL_in = NULL;
270 entry->id_2_memop = NULL;
271 entry->block = block;
272 entry->forward_next = env.forward;
273 entry->backward_next = NULL;
275 set_irn_link(block, entry);
277 /* create the list in inverse order */
282 * Block walker: create backward links for the memops of a block.
284 static void collect_backward(ir_node *block, void *ctx) {
285 block_t *entry = get_block_entry(block);
289 entry->backward_next = env.backward;
291 /* create the list in inverse order */
292 env.backward = entry;
294 /* create backward links for all memory ops */
296 for (op = entry->memop_forward; op != NULL; op = op->next) {
300 entry->memop_backward = last;
306 * @param irn the IR-node representing the memop
308 static memop_t *alloc_memop(ir_node *irn) {
309 memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
311 m->value.address = NULL;
312 m->value.value = NULL;
313 m->value.mode = NULL;
324 * Register an address and allocate an ID for it.
326 * @param adr the IR-node representing the address
328 static unsigned register_address(ir_node *adr) {
329 address_entry *entry = ir_nodemap_get(&env.adr_map, adr);
333 entry = obstack_alloc(&env.obst, sizeof(*entry));
335 entry->id = env.curr_adr_id++;
336 ir_nodemap_insert(&env.adr_map, adr, entry);
342 * Return the memory properties of a call node.
344 * @param call the call node
346 * return a bitset of mtp_property_const and mtp_property_pure
348 static unsigned get_Call_memory_properties(ir_node *call) {
349 ir_type *call_tp = get_Call_type(call);
350 unsigned prop = get_method_additional_properties(call_tp);
352 /* check first the call type */
353 if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
354 /* try the called entity */
355 ir_node *ptr = get_Call_ptr(call);
357 if (is_Global(ptr)) {
358 ir_entity *ent = get_Global_entity(ptr);
360 prop = get_entity_additional_properties(ent);
363 return prop & (mtp_property_const|mtp_property_pure);
367 * Update a memop for a Load.
371 static void update_Load_memop(memop_t *m) {
373 ir_node *load = m->node;
374 ir_node *adr = get_Load_ptr(load);
376 if (get_Load_volatility(load) == volatility_is_volatile)
377 m->flags |= FLAG_IGNORE;
379 m->value.address = adr;
381 for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
382 ir_node *proj = get_irn_out(load, i);
384 switch (get_Proj_proj(proj)) {
386 m->value.value = proj;
387 m->value.mode = get_irn_mode(proj);
389 case pn_Load_X_except:
390 m->flags |= FLAG_EXCEPTION;
398 if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
399 /* only create an address if this node is NOT killed immediately or ignored */
400 m->value.id = register_address(adr);
403 /* no user, KILL it */
404 m->flags |= FLAG_KILLED_NODE;
409 * Update a memop for a Store.
413 static void update_Store_memop(memop_t *m) {
415 ir_node *store = m->node;
416 ir_node *adr = get_Store_ptr(store);
418 if (get_Store_volatility(store) == volatility_is_volatile) {
419 m->flags |= FLAG_IGNORE;
421 /* only create an address if this node is NOT ignored */
422 m->value.id = register_address(adr);
426 m->value.address = adr;
428 for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
429 ir_node *proj = get_irn_out(store, i);
431 switch (get_Proj_proj(proj)) {
432 case pn_Store_X_except:
433 m->flags |= FLAG_EXCEPTION;
440 m->value.value = get_Store_value(store);
441 m->value.mode = get_irn_mode(m->value.value);
445 * Update a memop for a Call.
449 static void update_Call_memop(memop_t *m) {
450 ir_node *call = m->node;
451 unsigned prop = get_Call_memory_properties(call);
454 if (prop & mtp_property_const) {
455 /* A constant call did NOT use memory at all, we
456 can kick it from the list. */
457 } else if (prop & mtp_property_pure) {
458 /* pure calls READ memory */
459 m->flags = FLAG_KILL_STORES;
461 m->flags = FLAG_KILL_ALL;
463 for (i = get_irn_n_outs(call) - 1; i >= 0; --i) {
464 ir_node *proj = get_irn_out(call, i);
466 switch (get_Proj_proj(proj)) {
467 case pn_Call_X_except:
468 m->flags |= FLAG_EXCEPTION;
470 case pn_Call_M_regular:
478 * Update a memop for a Div/Mod/Quot/DivMod.
482 static void update_DivOp_memop(memop_t *m) {
483 ir_node *div = m->node;
486 for (i = get_irn_n_outs(div) - 1; i >= 0; --i) {
487 ir_node *proj = get_irn_out(div, i);
489 switch (get_Proj_proj(proj)) {
490 case pn_Generic_X_except:
491 m->flags |= FLAG_EXCEPTION;
493 case pn_Generic_M_regular:
501 * Update a memop for a Phi.
505 static void update_Phi_memop(memop_t *m) {
506 /* the Phi is it's own mem */
511 * Memory walker: collect all memory ops and build topological lists.
513 static void collect_memops(ir_node *irn, void *ctx) {
520 /* we can safely ignore ProjM's except the initial memory */
521 if (irn != get_irg_initial_mem(current_ir_graph))
525 op = alloc_memop(irn);
526 block = get_nodes_block(irn);
527 entry = get_block_entry(block);
530 update_Phi_memop(op);
531 /* Phis must be always placed first */
532 op->next = entry->memop_forward;
533 entry->memop_forward = op;
534 if (entry->memop_backward == NULL)
535 entry->memop_backward = op;
537 switch (get_irn_opcode(irn)) {
539 update_Load_memop(op);
542 update_Store_memop(op);
545 update_Call_memop(op);
557 /* we can those to find the memory edge */
563 update_DivOp_memop(op);
567 /* TODO: handle some builtins */
569 /* unsupported operation */
570 op->flags = FLAG_KILL_ALL;
574 /* all other should be placed last */
575 if (entry->memop_backward == NULL) {
576 entry->memop_forward = entry->memop_backward = op;
578 entry->memop_backward->next = op;
579 entry->memop_backward = op;
585 * Find an address in the current set.
587 * @param bl the block
588 * @param value the value to be searched for
590 static memop_t *find_address(const block_t *bl, const value_t *value) {
591 if (rbitset_is_set(env.curr_set, value->id)) {
592 memop_t *res = bl->id_2_memop[value->id];
594 if (res->value.mode == value->mode)
596 /* allow hidden casts */
597 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
598 get_mode_arithmetic(value->mode) == irma_twos_complement &&
599 get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
606 * Find an address in the avail_out set.
608 * @param bl the block
609 * @param value the value to be searched for
611 static memop_t *find_address_avail(const block_t *bl, const value_t *value) {
612 if (rbitset_is_set(bl->avail_out, value->id)) {
613 memop_t *res = bl->id_2_memop_avail[value->id];
615 if (res->value.mode == value->mode)
617 /* allow hidden casts */
618 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
619 get_mode_arithmetic(value->mode) == irma_twos_complement &&
620 get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
627 * Kill all Loads from the current set.
629 * @param bl the current block
631 static void kill_all_loads(block_t *bl) {
633 unsigned end = env.n_mem_ops * 2 - 1;
635 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
636 memop_t *op = bl->id_2_memop[pos];
638 if (! is_Store(op->node))
639 rbitset_clear(env.curr_set, pos);
644 * Kill all Stores from the current set.
646 * @param bl the current block
648 static void kill_all_stores(block_t *bl) {
650 unsigned end = env.n_mem_ops * 2 - 1;
652 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
653 memop_t *op = bl->id_2_memop[pos];
655 if (is_Store(op->node))
656 rbitset_clear(env.curr_set, pos);
661 * Kill all addresses from the current set.
663 static void kill_all(void) {
664 rbitset_clear_all(env.curr_set, env.rbs_size);
667 rbitset_set(env.curr_set, env.rbs_size - 1);
672 * Kill Stores that are not alias free due to a Load value from the current set.
674 * @param bl the block
675 * @param value the Load value
677 static void kill_stores(block_t *bl, const value_t *value) {
679 unsigned end = env.n_mem_ops * 2 - 1;
681 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
682 memop_t *op = bl->id_2_memop[pos];
684 if (is_Store(op->node)) {
685 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
686 op->value.address, op->value.mode)) {
687 rbitset_clear(env.curr_set, pos);
688 bl->id_2_memop[pos] = NULL;
695 * Kill memops that are not alias free due to a Store value from the current set.
697 * @param bl the block
698 * @param value the Store value
700 static void kill_memops(block_t *bl, const value_t *value) {
702 unsigned end = env.n_mem_ops * 2 - 1;
704 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
705 memop_t *op = bl->id_2_memop[pos];
707 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
708 op->value.address, op->value.mode)) {
709 rbitset_clear(env.curr_set, pos);
710 bl->id_2_memop[pos] = NULL;
716 * Add the value of a memop to the current set.
718 * @param bl the block
719 * @param op the memory op
721 static void add_memop(block_t *bl, memop_t *op) {
722 rbitset_set(env.curr_set, op->value.id);
723 bl->id_2_memop[op->value.id] = op;
727 * Add the value of a memop to the avail_out set.
729 * @param bl the block
730 * @param op the memory op
732 static void add_memop_avail(block_t *bl, memop_t *op) {
733 rbitset_set(bl->avail_out, op->value.id);
734 bl->id_2_memop_avail[op->value.id] = op;
738 * build a phi definition for a value in the given block
740 * @param block the block
741 * @param value the value
743 * @return the definition
745 static ir_node *build_phi_definition(ir_node *block, const value_t *value) {
750 /* no: we need a new Phi */
751 n = get_Block_n_cfgpreds(block);
754 block = get_Block_cfgpred_block(block, 0);
755 n = get_Block_n_cfgpreds(block);
756 mop = find_address(get_block_entry(block), value);
762 NEW_ARR_A(ir_node *, in, n);
763 for (i = n - 1; i >= 0; --i) {
764 ir_node *pred_bl = get_Block_cfgpred_block(block, i);
767 mop = find_address(get_block_entry(pred_bl), value);
768 def_bl = get_nodes_block(mop->value.value);
770 if (block_dominates(def_bl, pred_bl))
771 in[i] = mop->value.value;
773 in[i] = build_phi_definition(pred_bl, &mop->value);
776 phi = new_r_Phi(current_ir_graph, block, n, in, value->mode);
777 DB((dbg, LEVEL_2, "Created new Phi %+F for value %+F in block %+F\n", phi, value->value, block));
782 * find a definition for a value in the given block or above.
784 * @param block the block
785 * @param op the memop: the definition must dominate it
786 * @param value the value
788 * @return the definition
790 static ir_node *find_definition(ir_node *block, const memop_t *op, const value_t *value) {
791 ir_node *def_block = get_nodes_block(value->value);
793 if (def_block == block) {
794 /* the value is in our block: check, if it is above in the memory list */
797 for (p = op->prev; p != NULL; p = p->prev) {
798 if (!(p->flags & FLAG_KILLED_NODE) &&
799 p->value.address == value->address) {
801 assert(p->value.value == value->value);
802 return p->value.value;
805 } else if (block_dominates(def_block, block)) {
806 /* strictly dominates */
810 return build_phi_definition(block, value);
814 * Mark a Load memop to be replace by a definition
816 * @param op the Load memop
818 static void mark_replace_load(memop_t *op, ir_node *def) {
820 op->flags |= FLAG_KILLED_NODE;
825 * Mark a Store memop to be removed.
827 * @param op the Store memop
829 static void mark_remove_store(memop_t *op) {
830 op->flags |= FLAG_KILLED_NODE;
834 #define BYTE_SIZE(x) (((x) + 7) >> 3)
837 * Do forward dataflow analysis on a given block to calculate the avail_out set.
839 * @param bl the block
841 * @return non-zero if the set has changed since last iteration
843 static int forward_avail(block_t *bl) {
847 ir_node *pred = get_Block_cfgpred_block(bl->block, 0);
848 block_t *pred_bl = get_block_entry(pred);
850 rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
852 n = get_Block_n_cfgpreds(bl->block);
856 /* more than one predecessors, calculate the join */
857 for (i = n - 1; i > 0; --i) {
858 ir_node *pred = get_Block_cfgpred_block(bl->block, i);
859 block_t *pred_bl = get_block_entry(pred);
861 rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
864 /* sure that all values are in the map */
865 for (pos = env.rbs_size - 1; pos >= 0; --pos) {
866 if (! rbitset_is_set(env.curr_set, pos))
867 bl->id_2_memop[pos] = NULL;
869 for (i = n - 1; i >= 0; --i) {
870 ir_node *pred = get_Block_cfgpred_block(bl->block, i);
871 block_t *pred_bl = get_block_entry(pred);
873 if (pred_bl->id_2_memop[pos] != NULL) {
874 bl->id_2_memop[pos] = pred_bl->id_2_memop[pos];
881 /* only one predecessor, simply copy the map */
882 memcpy(bl->id_2_memop, pred_bl->id_2_memop, env.rbs_size * sizeof(bl->id_2_memop[0]));
885 dump_curr(bl, "Avail_in");
887 for (op = bl->memop_forward; op != NULL; op = op->next) {
888 switch (get_irn_opcode(op->node)) {
896 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
897 /* do we have this already? */
898 memop_t *other = find_address(bl, &op->value);
900 def = find_definition(bl->block, op, &other->value);
901 if (is_Store(other->node)) {
903 DB((dbg, LEVEL_1, "RAW %+F <- %+F(%+F)\n", op->node, def, other->node));
906 DB((dbg, LEVEL_1, "RAR %+F <- %+F(%+F)\n", op->node, def, other->node));
908 mark_replace_load(op, def);
911 kill_stores(bl, &op->value);
917 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
918 /* do we have this store already */
919 memop_t *other = find_address(bl, &op->value);
921 if (is_Store(other->node) &&
922 get_nodes_block(other->node) == get_nodes_block(op->node)) {
924 * A WAW in the same block we can kick the first store.
925 * This is a shortcut: we know that the second Store will be anticipated
928 DB((dbg, LEVEL_1, "WAW %+F <- %+F\n", op->node, other->node));
929 mark_remove_store(other);
930 /* FIXME: a Load might be get freed due to this killed store */
931 } else if (other->value.value == op->value.value) {
933 DB((dbg, LEVEL_1, "WAR %+F <- %+F\n", op->node, other->node));
934 mark_remove_store(op);
936 /* we overwrite the value that was loaded */
941 kill_memops(bl, &op->value);
947 switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
948 case FLAG_KILL_LOADS|FLAG_KILL_STORES:
951 case FLAG_KILL_LOADS:
954 case FLAG_KILL_STORES:
962 if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
964 rbitset_cpy(bl->avail_out, env.curr_set, env.rbs_size);
965 dump_curr(bl, "Avail_out*");
968 dump_curr(bl, "Avail_out");
973 * Do backward dataflow analysis on a given block to calculate the antic set
974 * of Loaded addresses.
976 * @param bl the block
978 * @return non-zero if the set has changed since last iteration
980 static int backward_antic(block_t *bl) {
982 ir_node *succ = get_Block_cfg_out(bl->block, 0);
983 block_t *succ_bl = get_block_entry(succ);
986 rbitset_cpy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
987 memcpy(bl->id_2_memop, succ_bl->id_2_memop, env.rbs_size * sizeof(bl->id_2_memop[0]));
989 n = get_Block_n_cfg_outs(bl->block);
993 for (i = n - 1; i > 0; --i) {
994 ir_node *succ = get_Block_cfg_out(bl->block, i);
995 block_t *succ_bl = get_block_entry(succ);
997 rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
1002 /* cleanup: kill those Loads which address is not available */
1003 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
1004 memop_t *op = succ_bl->id_2_memop[pos];
1005 ir_node *ptr = get_Load_ptr(op->node);
1006 ir_node *ptr_bl = get_nodes_block(ptr);
1008 if (!block_dominates(ptr_bl, bl->block))
1009 rbitset_clear(env.curr_set, pos);
1013 dump_curr(bl, "AnticL_out");
1015 for (op = bl->memop_backward; op != NULL; op = op->prev) {
1016 switch (get_irn_opcode(op->node)) {
1024 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1030 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
1031 /* a Store: check which memops must be killed */
1032 kill_memops(bl, &op->value);
1036 switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
1037 case FLAG_KILL_LOADS|FLAG_KILL_STORES:
1040 case FLAG_KILL_LOADS:
1043 case FLAG_KILL_STORES:
1044 kill_all_stores(bl);
1051 dump_curr(bl, "AnticL_in");
1052 if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1054 rbitset_cpy(bl->anticL_in, env.curr_set, env.rbs_size);
1061 * Replace a Load memop by a already known value.
1063 * @param op the Load memop
1065 static void replace_load(memop_t *op) {
1066 ir_node *load = op->node;
1067 ir_node *def = skip_Id(op->replace);
1072 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1074 if (op->flags & FLAG_EXCEPTION) {
1075 /* bad: this node is unused and executed for exception only */
1076 DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1079 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1081 for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
1082 ir_node *proj = get_irn_out(load, i);
1084 switch (get_Proj_proj(proj)) {
1086 exchange(proj, get_Load_mem(load));
1089 mode = get_irn_mode(proj);
1090 if (get_irn_mode(def) != mode) {
1092 dbg_info *db = get_irn_dbg_info(load);
1093 ir_node *block = get_nodes_block(proj);
1094 def = new_rd_Conv(db, current_ir_graph, block, def, mode);
1096 exchange(proj, def);
1098 case pn_Load_X_except:
1099 exchange(proj, new_Bad());
1101 case pn_Load_X_regular:
1102 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1105 panic("Unknown Proj from Load");
1111 * Remove a Store memop.
1113 * @param op the Store memop
1115 static void remove_store(memop_t *op) {
1116 ir_node *store = op->node;
1119 DB((dbg, LEVEL_1, "Removing %+F\n", store));
1120 for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
1121 ir_node *proj = get_irn_out(store, i);
1123 switch (get_Proj_proj(proj)) {
1125 exchange(proj, get_Store_mem(store));
1127 case pn_Store_X_except:
1128 exchange(proj, new_Bad());
1130 case pn_Store_X_regular:
1131 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(store)));
1134 panic("Unknown Proj from Store");
1141 * Do all necessary replacements for a given block.
1143 * @param bl the block
1145 static void do_replacements(block_t *bl) {
1148 for (op = bl->memop_forward; op != NULL; op = op->next) {
1149 if (op->flags & FLAG_KILLED_NODE) {
1150 switch (get_irn_opcode(op->node)) {
1163 * Calculate the Avail_out sets for all basic blocks.
1165 static void calcAvail(void) {
1169 /* calculate avail_out */
1170 DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1173 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1177 /* over all blocks in reverse post order, skip the start block */
1178 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1179 need_iter |= forward_avail(bl);
1182 } while (need_iter);
1184 /* copy the content of the id_2_memop map into the id_2_memop_avail map
1185 as it must be preserved for later use */
1186 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1187 memop_t **t = bl->id_2_memop_avail;
1189 bl->id_2_memop_avail = bl->id_2_memop;
1192 DB((dbg, LEVEL_2, "Get avail set after %d iterations\n\n", i));
1196 * Calculate the Antic_in sets for all basic blocks.
1198 static void calcAntic(void) {
1201 /* calculate antic_out */
1202 DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1207 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1211 /* over all blocks in reverse post order */
1212 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1213 need_iter |= backward_antic(bl);
1216 } while (need_iter);
1217 DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1221 * Return the node representing the last memory in a block.
1223 * @param bl the block
1225 static ir_node *find_first_memory(block_t *bl) {
1227 if (bl->memop_forward != NULL) {
1228 return bl->memop_forward->node;
1230 /* if there is NO memory in this block, go to the post dominator */
1231 bl = get_block_entry(get_Block_ipostdom(bl->block));
1236 * Return the node representing the last memory in a block.
1238 * @param bl the block
1240 static ir_node *find_last_memory(block_t *bl) {
1242 if (bl->memop_backward != NULL) {
1243 return bl->memop_backward->mem;
1245 /* if there is NO memory in this block, go to the dominator */
1246 bl = get_block_entry(get_Block_idom(bl->block));
1251 * Reroute all memory users of old memory
1252 * to a new memory IR-node.
1254 * @param omem the old memory IR-node
1255 * @param nmem the new memory IR-node
1257 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
1260 for (i = get_irn_n_outs(omem) - 1; i >= 0; --i) {
1262 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1264 set_irn_n(user, n_pos, nmem);
1267 /* all edges previously point to omem now point to nmem */
1268 nmem->out = omem->out;
1272 * Reroute memory users of old memory that are dominated by a given block
1273 * to a new memory IR-node.
1275 * @param omem the old memory IR-node
1276 * @param nmem the new memory IR-node
1277 * @param pass_bl the block the memory must pass
1279 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl) {
1280 int i, j, n = get_irn_n_outs(omem);
1281 ir_def_use_edge *edges = NEW_ARR_D(ir_def_use_edge, &env.obst, n + 1);
1283 for (i = j = 0; i < n; ++i) {
1285 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1286 ir_node *use_bl = get_nodes_block(user);
1290 use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1292 if (block_dominates(pass_bl, use_bl)) {
1293 /* found an user that is dominated */
1295 edges[j].pos = n_pos;
1296 edges[j].use = user;
1298 set_irn_n(user, n_pos, nmem);
1302 /* Modify the out structure: we create a new out edge array on our
1303 temporary obstack here. This should be no problem, as we invalidate the edges
1304 at the end either. */
1305 /* first entry is used for the length */
1314 static void insert_Load(ir_node *block, void *ctx) {
1315 int *new_stuff = ctx;
1317 int i, n = get_Block_n_cfgpreds(block);
1319 unsigned end = env.n_mem_ops * 2 - 1;
1324 bl = get_block_entry(block);
1326 for (pos = rbitset_next(bl->anticL_in, pos, 1); pos != end; pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
1327 memop_t *op = bl->id_2_memop[pos];
1330 assert(is_Load(op->node));
1332 if (op->flags & FLAG_KILLED_NODE)
1336 for (i = n - 1; i >= 0; --i) {
1337 ir_node *pred = get_Block_cfgpred_block(block, i);
1338 block_t *pred_bl = get_block_entry(pred);
1339 memop_t *e = find_address_avail(pred_bl, &op->value);
1342 ir_node *block = get_nodes_block(op->value.address);
1343 if (! block_dominates(block, pred)) {
1344 /* cannot place a copy here */
1348 pred_bl->avail = NULL;
1355 ir_mode *mode = op->value.mode;
1358 NEW_ARR_A(ir_node *, in, n);
1360 for (i = n - 1; i >= 0; --i) {
1361 ir_node *pred = get_Block_cfgpred_block(block, i);
1362 block_t *pred_bl = get_block_entry(pred);
1364 if (pred_bl->avail == NULL) {
1365 /* create a new Load here and make to make it fully redundant */
1366 dbg_info *db = get_irn_dbg_info(op->node);
1367 ir_node *last_mem = find_last_memory(pred_bl);
1368 ir_node *load, *def;
1371 assert(last_mem != NULL);
1372 load = new_rd_Load(db, current_ir_graph, pred, last_mem, op->value.address, mode, cons_none);
1373 def = new_r_Proj(current_ir_graph, pred, load, mode, pn_Load_res);
1374 DB((dbg, LEVEL_1, "Created new %+F for party redundant %+F\n", load, op->node));
1376 new_op = alloc_memop(load);
1377 new_op->mem = new_r_Proj(current_ir_graph, pred, load, mode_M, pn_Load_M);
1378 new_op->value.address = op->value.address;
1379 new_op->value.id = op->value.id;
1380 new_op->value.mode = mode;
1381 new_op->value.value = def;
1383 new_op->prev = pred_bl->memop_backward;
1384 pred_bl->memop_backward = new_op;
1386 if (get_nodes_block(last_mem) == pred) {
1387 /* We have add a new last memory op in pred block.
1388 If pred had already a last mem, reroute all memory
1390 reroute_all_mem_users(last_mem, new_op->mem);
1392 /* reroute only those memory going through the pre block */
1393 reroute_mem_through(last_mem, new_op->mem, pred);
1396 /* we added this load at the end, so it will be avail anyway */
1397 add_memop_avail(pred_bl, new_op);
1398 pred_bl->avail = new_op;
1400 in[i] = pred_bl->avail->value.value;
1402 phi = new_r_Phi(current_ir_graph, block, n, in, mode);
1403 mark_replace_load(op, phi);
1406 if (get_nodes_block(op->node) == block) {
1407 /* The redundant node was in the current block:
1408 In that case, DO NOT update avail_out. If it was NOT
1409 avail although it is executed in this bLock, it is killed by a later
1413 /* The redundant node is NOT in the current block and anticipated.
1414 This can only happen IFF nothings kills the Load in the current block,
1415 so it will be avail in the next iteration.
1417 add_memop_avail(bl, op);
1419 /* TODO propagate it downwards */
1426 * Insert Loads upwards.
1428 static void insert_Loads_upwards(void) {
1431 /* calculate antic_out */
1432 DB((dbg, LEVEL_2, "Inserting Loads"));
1435 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1438 dom_tree_walk_irg(current_ir_graph, insert_Load, NULL, &need_iter);
1440 } while (need_iter);
1441 DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
1444 int opt_ldst(ir_graph *irg) {
1446 ir_graph *rem = current_ir_graph;
1448 current_ir_graph = irg;
1450 FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
1452 DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
1454 /* we need landing pads */
1455 remove_critical_cf_edges(irg);
1457 dump_ir_block_graph(irg, "-XXX");
1459 if (get_opt_alias_analysis()) {
1460 assure_irg_entity_usage_computed(irg);
1461 assure_irp_globals_entity_usage_computed();
1464 obstack_init(&env.obst);
1465 ir_nodemap_init(&env.adr_map);
1468 env.backward = NULL;
1469 env.curr_adr_id = 0;
1472 env.start_bl = get_irg_start_block(irg);
1475 assure_irg_outs(irg);
1477 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1479 /* first step: allocate block entries: produces an
1480 inverse post-order list for the CFG */
1481 irg_out_block_walk(get_irg_start_block(irg), NULL, prepare_blocks, NULL);
1483 /* second step: find and sort all memory ops */
1484 walk_memory_irg(irg, collect_memops, NULL, NULL);
1486 if (env.n_mem_ops == 0) {
1491 /* create the backward links */
1492 irg_block_walk_graph(irg, NULL, collect_backward, NULL);
1494 /* check that we really start with the start / end block */
1495 assert(env.forward->block == env.start_bl);
1496 assert(env.backward->block == get_irg_end_block(irg));
1498 /* create address sets: we know that 2 * n_mem_ops - 1 is an upper bound for all possible addresses */
1499 env.rbs_size = 2 * env.n_mem_ops;
1501 /* create the current set */
1502 env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1503 rbitset_set(env.curr_set, env.rbs_size - 1);
1505 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
1506 /* set sentinel bits */
1507 bl->avail_out = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1508 rbitset_set(bl->avail_out, env.rbs_size - 1);
1510 bl->id_2_memop_avail = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
1511 memset(bl->id_2_memop_avail, 0, env.rbs_size * sizeof(bl->id_2_memop[0]));
1513 bl->anticL_in = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1514 rbitset_set(bl->anticL_in, env.rbs_size - 1);
1516 bl->id_2_memop = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
1517 memset(bl->id_2_memop, 0, env.rbs_size * sizeof(bl->id_2_memop[0]));
1520 // dump_block_list(&env);
1525 insert_Loads_upwards();
1528 /* over all blocks in reverse post order */
1529 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
1530 do_replacements(bl);
1533 /* not only invalidate but free them. We might allocate new out arrays
1534 on our obstack which will be deleted yet. */
1536 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
1540 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1541 ir_nodemap_destroy(&env.adr_map);
1542 obstack_free(&env.obst, NULL);
1544 current_ir_graph = rem;
1546 return env.changed != 0;