2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Dataflow driven Load/Store optimizations, uses some ideas from
24 * @author Michael Beck
39 #include "irnodemap.h"
40 #include "raw_bitset.h"
45 * Mapping an address to an densed ID.
47 typedef struct address_entry_t {
48 unsigned id; /**< The ID */
55 FLAG_KILL_LOADS = 1, /**< KILL all Loads */
56 FLAG_KILL_STORES = 2, /**< KILL all Stores */
57 FLAG_KILLED_NODE = 4, /**< this node was killed */
58 FLAG_EXCEPTION = 8, /**< this node has exception flow */
59 FLAG_IGNORE = 16, /**< ignore this node (volatile or other) */
60 /** this memop KILLS all addresses */
61 FLAG_KILL_ALL = FLAG_KILL_LOADS|FLAG_KILL_STORES
65 * A value: This represents a value stored at a given address in
66 * memory. Do not confuse with values from value numbering.
68 typedef struct value_t value_t;
70 ir_node *address; /**< the address of this value */
71 ir_node *value; /**< the value itself */
72 ir_mode *mode; /**< the mode of the value */
73 unsigned id; /**< address id */
77 * A memop describes an memory-related operation.
78 * These are Loads/Store and all other ops that might modify
79 * memory (Calls, CopyB) or causing exceptions.
81 typedef struct memop_t memop_t;
83 value_t value; /**< the value of this memop: only defined for Load/Store */
84 ir_node *node; /**< the memory op itself */
85 ir_node *mem; /**< the memory FROM this node */
86 ir_node *replace; /**< the replacement node if this memop is replaced */
87 memop_t *next; /**< links to the next memory op in the block in forward order. */
88 memop_t *prev; /**< links to the previous memory op in the block in forward order. */
89 unsigned flags; /**< memop flags */
93 * Additional data for every basic block.
95 typedef struct block_t block_t;
97 memop_t *memop_forward; /**< topologically sorted list of memory ops in this block */
98 memop_t *memop_backward; /**< last memop in the list */
99 unsigned *avail_out; /**< out-set of available addresses */
100 memop_t **id_2_memop_avail; /**< maps avail address ids to memops */
101 unsigned *anticL_in; /**< in-set of anticipated Load addresses */
102 memop_t **id_2_memop; /**< maps address ids to memops */
103 ir_node *block; /**< the associated block */
104 block_t *forward_next; /**< next block entry for forward iteration */
105 block_t *backward_next; /**< next block entry for backward iteration */
106 memop_t *avail; /**< used locally for the avail map */
109 #define get_block_entry(block) ((block_t *)get_irn_link(block))
112 * Metadata for this pass.
114 typedef struct ldst_env_t {
115 struct obstack obst; /**< obstack for temporary data */
116 ir_nodemap_t adr_map; /**< Map addresses to */
117 block_t *forward; /**< Inverse post-order list of all blocks Start->End */
118 block_t *backward; /**< Inverse post-order list of all blocks End->Start */
119 ir_node *start_bl; /**< start block of the current graph */
120 unsigned *curr_set; /**< current set of addresses */
121 unsigned curr_adr_id; /**< number for address mapping */
122 unsigned n_mem_ops; /**< number of memory operations (Loads/Stores) */
123 unsigned rbs_size; /**< size of all bitsets in bytes */
124 int changed; /**< Flags for changed graph state */
129 static firm_dbg_module_t *dbg;
131 /* the one and only environment */
135 * Dumps the block list.
137 * @param ldst environment
139 static void dump_block_list(ldst_env *env) {
144 for (entry = env->forward; entry != NULL; entry = entry->forward_next) {
145 DB((dbg, LEVEL_2, "%+F {", entry->block));
148 for (op = entry->memop_forward; op != NULL; op = op->next) {
150 DB((dbg, LEVEL_2, "\n\t"));
151 } DB((dbg, LEVEL_2, "%+F", op->node));
152 if ((op->flags & FLAG_KILL_ALL) == FLAG_KILL_ALL)
153 DB((dbg, LEVEL_2, "X"));
154 else if (op->flags & FLAG_KILL_LOADS)
155 DB((dbg, LEVEL_2, "L"));
156 else if (op->flags & FLAG_KILL_STORES)
157 DB((dbg, LEVEL_2, "S"));
158 DB((dbg, LEVEL_2, ", "));
162 DB((dbg, LEVEL_2, "\n}\n\n"));
167 * Dumps the current set.
169 * @param bl current block
170 * @param s name of the set
172 static void dump_curr(block_t *bl, const char *s) {
174 unsigned end = env.n_mem_ops * 2 - 1;
177 DB((dbg, LEVEL_2, "%s[%+F] = {", s, bl->block));
179 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
180 memop_t *op = bl->id_2_memop[pos];
183 DB((dbg, LEVEL_2, "\n\t"));
185 DB((dbg, LEVEL_2, "<%+F, %+F>, ", op->value.address, op->value.value));
188 DB((dbg, LEVEL_2, "\n}\n"));
192 #define dump_block_list()
193 #define dump_curr(bl, s)
194 #endif /* DEBUG_libfirm */
197 * Walk over the memory edges from definition to users.
199 * @param irn start node
200 * @param pre pre walker function
201 * @param post post walker function
202 * @param ctx context parameter for the walker functions
204 static void walk_memory(ir_node *irn, irg_walk_func *pre, irg_walk_func *post, void *ctx) {
208 mark_irn_visited(irn);
213 mode = get_irn_mode(irn);
214 if (mode == mode_M) {
215 /* every successor uses memory */
216 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
217 ir_node *succ = get_irn_out(irn, i);
219 if (! irn_visited(succ))
220 walk_memory(succ, pre, post, ctx);
222 } else if (mode == mode_T) {
223 /* only some Proj's uses memory */
224 for (i = get_irn_n_outs(irn) - 1; i >= 0; --i) {
225 ir_node *proj = get_irn_out(irn, i);
227 if (get_irn_mode(proj) == mode_M && ! irn_visited(proj))
228 walk_memory(proj, pre, post, ctx);
236 * Walks over all memory nodes of a graph.
239 * @param pre pre walker function
240 * @param post post walker function
241 * @param ctx context parameter for the walker functions
243 static void walk_memory_irg(ir_graph *irg, irg_walk_func pre, irg_walk_func post, void *ctx) {
244 inc_irg_visited(irg);
246 ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
249 * there are two possible sources for memory: initial_mem and nomem
250 * we ignore nomem as this should NOT change the memory
252 walk_memory(get_irg_initial_mem(irg), pre, post, ctx);
254 ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
258 * Block walker: allocate an block entry for every block.
260 static void prepare_blocks(ir_node *block, void *ctx) {
261 block_t *entry = obstack_alloc(&env.obst, sizeof(*entry));
265 entry->memop_forward = NULL;
266 entry->memop_backward = NULL;
267 entry->avail_out = NULL;
268 entry->id_2_memop_avail = NULL;
269 entry->anticL_in = NULL;
270 entry->id_2_memop = NULL;
271 entry->block = block;
272 entry->forward_next = env.forward;
273 entry->backward_next = NULL;
275 set_irn_link(block, entry);
277 /* create the list in inverse order */
282 * Block walker: create backward links for the memops of a block.
284 static void collect_backward(ir_node *block, void *ctx) {
285 block_t *entry = get_block_entry(block);
289 entry->backward_next = env.backward;
291 /* create the list in inverse order */
292 env.backward = entry;
294 /* create backward links for all memory ops */
296 for (op = entry->memop_forward; op != NULL; op = op->next) {
300 entry->memop_backward = last;
306 * @param irn the IR-node representing the memop
308 static memop_t *alloc_memop(ir_node *irn) {
309 memop_t *m = obstack_alloc(&env.obst, sizeof(*m));
311 m->value.address = NULL;
312 m->value.value = NULL;
313 m->value.mode = NULL;
324 * Register an address and allocate an ID for it.
326 * @param adr the IR-node representing the address
328 static unsigned register_address(ir_node *adr) {
329 address_entry *entry = ir_nodemap_get(&env.adr_map, adr);
333 entry = obstack_alloc(&env.obst, sizeof(*entry));
335 entry->id = env.curr_adr_id++;
336 ir_nodemap_insert(&env.adr_map, adr, entry);
342 * Return the memory properties of a call node.
344 * @param call the call node
346 * return a bitset of mtp_property_const and mtp_property_pure
348 static unsigned get_Call_memory_properties(ir_node *call) {
349 ir_type *call_tp = get_Call_type(call);
350 unsigned prop = get_method_additional_properties(call_tp);
352 /* check first the call type */
353 if ((prop & (mtp_property_const|mtp_property_pure)) == 0) {
354 /* try the called entity */
355 ir_node *ptr = get_Call_ptr(call);
357 if (is_Global(ptr)) {
358 ir_entity *ent = get_Global_entity(ptr);
360 prop = get_entity_additional_properties(ent);
363 return prop & (mtp_property_const|mtp_property_pure);
367 * Update a memop for a Load.
371 static void update_Load_memop(memop_t *m) {
373 ir_node *load = m->node;
374 ir_node *adr = get_Load_ptr(load);
376 if (get_Load_volatility(load) == volatility_is_volatile)
377 m->flags |= FLAG_IGNORE;
379 m->value.address = adr;
381 for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
382 ir_node *proj = get_irn_out(load, i);
384 switch (get_Proj_proj(proj)) {
386 m->value.value = proj;
387 m->value.mode = get_irn_mode(proj);
389 case pn_Load_X_except:
390 m->flags |= FLAG_EXCEPTION;
398 if (m->value.value != NULL && !(m->flags & FLAG_IGNORE)) {
399 /* only create an address if this node is NOT killed immediately or ignored */
400 m->value.id = register_address(adr);
403 /* no user, KILL it */
404 m->flags |= FLAG_KILLED_NODE;
409 * Update a memop for a Store.
413 static void update_Store_memop(memop_t *m) {
415 ir_node *store = m->node;
416 ir_node *adr = get_Store_ptr(store);
418 if (get_Store_volatility(store) == volatility_is_volatile) {
419 m->flags |= FLAG_IGNORE;
421 /* only create an address if this node is NOT ignored */
422 m->value.id = register_address(adr);
426 m->value.address = adr;
428 for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
429 ir_node *proj = get_irn_out(store, i);
431 switch (get_Proj_proj(proj)) {
432 case pn_Store_X_except:
433 m->flags |= FLAG_EXCEPTION;
440 m->value.value = get_Store_value(store);
441 m->value.mode = get_irn_mode(m->value.value);
445 * Update a memop for a Call.
449 static void update_Call_memop(memop_t *m) {
450 ir_node *call = m->node;
451 unsigned prop = get_Call_memory_properties(call);
454 if (prop & mtp_property_const) {
455 /* A constant call did NOT use memory at all, we
456 can kick it from the list. */
457 } else if (prop & mtp_property_pure) {
458 /* pure calls READ memory */
459 m->flags = FLAG_KILL_STORES;
461 m->flags = FLAG_KILL_ALL;
463 for (i = get_irn_n_outs(call) - 1; i >= 0; --i) {
464 ir_node *proj = get_irn_out(call, i);
466 switch (get_Proj_proj(proj)) {
467 case pn_Call_X_except:
468 m->flags |= FLAG_EXCEPTION;
470 case pn_Call_M_regular:
478 * Update a memop for a Div/Mod/Quot/DivMod.
482 static void update_DivOp_memop(memop_t *m) {
483 ir_node *div = m->node;
486 for (i = get_irn_n_outs(div) - 1; i >= 0; --i) {
487 ir_node *proj = get_irn_out(div, i);
489 switch (get_Proj_proj(proj)) {
490 case pn_Generic_X_except:
491 m->flags |= FLAG_EXCEPTION;
493 case pn_Generic_M_regular:
501 * Update a memop for a Phi.
505 static void update_Phi_memop(memop_t *m) {
506 /* the Phi is it's own mem */
511 * Memory walker: collect all memory ops and build topological lists.
513 static void collect_memops(ir_node *irn, void *ctx) {
520 /* we can safely ignore ProjM's except the initial memory */
521 if (irn != get_irg_initial_mem(current_ir_graph))
525 op = alloc_memop(irn);
526 block = get_nodes_block(irn);
527 entry = get_block_entry(block);
530 update_Phi_memop(op);
531 /* Phis must be always placed first */
532 op->next = entry->memop_forward;
533 entry->memop_forward = op;
534 if (entry->memop_backward == NULL)
535 entry->memop_backward = op;
537 switch (get_irn_opcode(irn)) {
539 update_Load_memop(op);
542 update_Store_memop(op);
545 update_Call_memop(op);
557 /* we can those to find the memory edge */
563 update_DivOp_memop(op);
567 /* TODO: handle some builtins */
569 /* unsupported operation */
570 op->flags = FLAG_KILL_ALL;
574 /* all other should be placed last */
575 if (entry->memop_backward == NULL) {
576 entry->memop_forward = entry->memop_backward = op;
578 entry->memop_backward->next = op;
579 entry->memop_backward = op;
585 * Find an address in the current set.
587 * @param bl the block
588 * @param value the value to be searched for
590 static memop_t *find_address(const block_t *bl, const value_t *value) {
591 if (rbitset_is_set(env.curr_set, value->id)) {
592 memop_t *res = bl->id_2_memop[value->id];
594 if (res->value.mode == value->mode)
596 /* allow hidden casts */
597 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
598 get_mode_arithmetic(value->mode) == irma_twos_complement &&
599 get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
606 * Find an address in the avail_out set.
608 * @param bl the block
609 * @param value the value to be searched for
611 static memop_t *find_address_avail(const block_t *bl, const value_t *value) {
612 if (rbitset_is_set(bl->avail_out, value->id)) {
613 memop_t *res = bl->id_2_memop_avail[value->id];
615 if (res->value.mode == value->mode)
617 /* allow hidden casts */
618 if (get_mode_arithmetic(res->value.mode) == irma_twos_complement &&
619 get_mode_arithmetic(value->mode) == irma_twos_complement &&
620 get_mode_size_bits(res->value.mode) == get_mode_size_bits(value->mode))
627 * Kill all Loads from the current set.
629 * @param bl the current block
631 static void kill_all_loads(block_t *bl) {
633 unsigned end = env.n_mem_ops * 2 - 1;
635 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
636 memop_t *op = bl->id_2_memop[pos];
638 if (! is_Store(op->node))
639 rbitset_clear(env.curr_set, pos);
644 * Kill all Stores from the current set.
646 * @param bl the current block
648 static void kill_all_stores(block_t *bl) {
650 unsigned end = env.n_mem_ops * 2 - 1;
652 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
653 memop_t *op = bl->id_2_memop[pos];
655 if (is_Store(op->node))
656 rbitset_clear(env.curr_set, pos);
661 * Kill all addresses from the current set.
663 static void kill_all(void) {
664 rbitset_clear_all(env.curr_set, env.rbs_size);
667 rbitset_set(env.curr_set, env.rbs_size - 1);
672 * Kill Stores that are not alias free due to a Load value from the current set.
674 * @param bl the block
675 * @param value the Load value
677 static void kill_stores(block_t *bl, const value_t *value) {
679 unsigned end = env.n_mem_ops * 2 - 1;
681 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
682 memop_t *op = bl->id_2_memop[pos];
684 if (is_Store(op->node)) {
685 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
686 op->value.address, op->value.mode)) {
687 rbitset_clear(env.curr_set, pos);
688 bl->id_2_memop[pos] = NULL;
695 * Kill memops that are not alias free due to a Store value from the current set.
697 * @param bl the block
698 * @param value the Store value
700 static void kill_memops(block_t *bl, const value_t *value) {
702 unsigned end = env.n_mem_ops * 2 - 1;
704 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
705 memop_t *op = bl->id_2_memop[pos];
707 if (ir_no_alias != get_alias_relation(current_ir_graph, value->address, value->mode,
708 op->value.address, op->value.mode)) {
709 rbitset_clear(env.curr_set, pos);
710 bl->id_2_memop[pos] = NULL;
716 * Add the value of a memop to the current set.
718 * @param bl the block
719 * @param op the memory op
721 static void add_memop(block_t *bl, memop_t *op) {
722 rbitset_set(env.curr_set, op->value.id);
723 bl->id_2_memop[op->value.id] = op;
727 * Add the value of a memop to the avail_out set.
729 * @param bl the block
730 * @param op the memory op
732 static void add_memop_avail(block_t *bl, memop_t *op) {
733 rbitset_set(bl->avail_out, op->value.id);
734 bl->id_2_memop_avail[op->value.id] = op;
738 * find a definition for a value in the given block.
740 * @param block the block
741 * @param value the value
743 * @return the definition
745 static ir_node *find_definition(ir_node *block, const value_t *value) {
746 ir_node *def_bl = get_nodes_block(value->value);
751 if (block_dominates(def_bl, block))
754 /* no: we need a new Phi */
755 n = get_Block_n_cfgpreds(block);
758 block = get_Block_cfgpred_block(block, 0);
759 n = get_Block_n_cfgpreds(block);
760 mop = find_address(get_block_entry(block), value);
766 NEW_ARR_A(ir_node *, in, n);
767 for (i = n - 1; i >= 0; --i) {
768 ir_node *pred_bl = get_Block_cfgpred_block(block, i);
769 mop = find_address(get_block_entry(pred_bl), value);
770 in[i] = find_definition(pred_bl, &mop->value);
773 phi = new_r_Phi(current_ir_graph, block, n, in, value->mode);
774 DB((dbg, LEVEL_2, "Created new Phi %+F for value %+F in block %+F\n", phi, value->value, block));
779 * Mark a Load memop to be replace by a definition
781 * @param op the Load memop
783 static void mark_replace_load(memop_t *op, ir_node *def) {
785 op->flags |= FLAG_KILLED_NODE;
790 * Mark a Store memop to be removed.
792 * @param op the Store memop
794 static void mark_remove_store(memop_t *op) {
795 op->flags |= FLAG_KILLED_NODE;
799 #define BYTE_SIZE(x) (((x) + 7) >> 3)
802 * Do forward dataflow analysis on a given block to calculate the avail_out set.
804 * @param bl the block
806 * @return non-zero if the set has changed since last iteration
808 static int forward_avail(block_t *bl) {
812 ir_node *pred = get_Block_cfgpred_block(bl->block, 0);
813 block_t *pred_bl = get_block_entry(pred);
815 rbitset_cpy(env.curr_set, pred_bl->avail_out, env.rbs_size);
817 n = get_Block_n_cfgpreds(bl->block);
821 /* more than one predecessors, calculate the join */
822 for (i = n - 1; i > 0; --i) {
823 ir_node *pred = get_Block_cfgpred_block(bl->block, i);
824 block_t *pred_bl = get_block_entry(pred);
826 rbitset_and(env.curr_set, pred_bl->avail_out, env.rbs_size);
829 /* sure that all values are in the map */
830 for (pos = env.rbs_size - 1; pos >= 0; --pos) {
831 if (! rbitset_is_set(env.curr_set, pos))
832 bl->id_2_memop[pos] = NULL;
834 for (i = n - 1; i >= 0; --i) {
835 ir_node *pred = get_Block_cfgpred_block(bl->block, i);
836 block_t *pred_bl = get_block_entry(pred);
838 if (pred_bl->id_2_memop[pos] != NULL) {
839 bl->id_2_memop[pos] = pred_bl->id_2_memop[pos];
846 /* only one predecessor, simply copy the map */
847 memcpy(bl->id_2_memop, pred_bl->id_2_memop, env.rbs_size * sizeof(bl->id_2_memop[0]));
850 dump_curr(bl, "Avail_in");
852 for (op = bl->memop_forward; op != NULL; op = op->next) {
853 switch (get_irn_opcode(op->node)) {
861 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
862 /* do we have this already? */
863 memop_t *other = find_address(bl, &op->value);
865 if (is_Store(other->node)) {
867 DB((dbg, LEVEL_1, "RAW %+F %+F\n", op->node, other->node));
870 DB((dbg, LEVEL_1, "RAR %+F %+F\n", op->node, other->node));
872 def = find_definition(bl->block, &other->value);
873 mark_replace_load(op, def);
876 kill_stores(bl, &op->value);
882 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
883 /* do we have this store already */
884 memop_t *other = find_address(bl, &op->value);
886 if (is_Store(other->node)) {
888 DB((dbg, LEVEL_1, "WAW %+F %+F\n", op->node, other->node));
889 mark_remove_store(other);
890 /* FIXME: a Load might be get freed due to this killed store */
891 } else if (other->value.value == op->value.value) {
893 DB((dbg, LEVEL_1, "WAR %+F %+F\n", op->node, other->node));
894 mark_remove_store(op);
896 /* we overwrite the value that was loaded */
901 kill_memops(bl, &op->value);
907 switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
908 case FLAG_KILL_LOADS|FLAG_KILL_STORES:
911 case FLAG_KILL_LOADS:
914 case FLAG_KILL_STORES:
922 dump_curr(bl, "Avail_out");
923 if (!rbitset_equal(bl->avail_out, env.curr_set, env.rbs_size)) {
925 rbitset_cpy(bl->avail_out, env.curr_set, env.rbs_size);
932 * Do backward dataflow analysis on a given block to calculate the antic set
933 * of Loaded addresses.
935 * @param bl the block
937 * @return non-zero if the set has changed since last iteration
939 static int backward_antic(block_t *bl) {
941 ir_node *succ = get_Block_cfg_out(bl->block, 0);
942 block_t *succ_bl = get_block_entry(succ);
945 rbitset_cpy(env.curr_set, succ_bl->anticL_in, env.rbs_size);
946 memcpy(bl->id_2_memop, succ_bl->id_2_memop, env.rbs_size * sizeof(bl->id_2_memop[0]));
948 n = get_Block_n_cfg_outs(bl->block);
952 for (i = n - 1; i > 0; --i) {
953 ir_node *succ = get_Block_cfg_out(bl->block, i);
954 block_t *succ_bl = get_block_entry(succ);
956 rbitset_and(env.curr_set, succ_bl->anticL_in, env.rbs_size);
961 /* cleanup: kill those Loads which address is not available */
962 for (pos = rbitset_next(env.curr_set, pos, 1); pos != end; pos = rbitset_next(env.curr_set, pos + 1, 1)) {
963 memop_t *op = succ_bl->id_2_memop[pos];
964 ir_node *ptr = get_Load_ptr(op->node);
965 ir_node *ptr_bl = get_nodes_block(ptr);
967 if (!block_dominates(ptr_bl, bl->block))
968 rbitset_clear(env.curr_set, pos);
972 dump_curr(bl, "AnticL_out");
974 for (op = bl->memop_backward; op != NULL; op = op->prev) {
975 switch (get_irn_opcode(op->node)) {
983 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
989 if (! (op->flags & (FLAG_KILLED_NODE|FLAG_IGNORE))) {
990 /* a Store: check which memops must be killed */
991 kill_memops(bl, &op->value);
995 switch (op->flags & (FLAG_KILL_LOADS|FLAG_KILL_STORES)) {
996 case FLAG_KILL_LOADS|FLAG_KILL_STORES:
999 case FLAG_KILL_LOADS:
1002 case FLAG_KILL_STORES:
1003 kill_all_stores(bl);
1010 dump_curr(bl, "AnticL_in");
1011 if (! rbitset_equal(bl->anticL_in, env.curr_set, env.rbs_size)) {
1013 rbitset_cpy(bl->anticL_in, env.curr_set, env.rbs_size);
1020 * Replace a Load memop by a already known value.
1022 * @param op the Load memop
1024 static void replace_load(memop_t *op) {
1025 ir_node *load = op->node;
1026 ir_node *def = skip_Id(op->replace);
1031 DB((dbg, LEVEL_1, "Replacing %+F by definition %+F\n", load, is_Proj(def) ? get_Proj_pred(def) : def));
1033 if (op->flags & FLAG_EXCEPTION) {
1034 /* bad: this node is unused and executed for exception only */
1035 DB((dbg, LEVEL_1, "Unused %+F executed for exception only ...\n", load));
1038 DB((dbg, LEVEL_1, "Killing unused %+F\n", load));
1040 for (i = get_irn_n_outs(load) - 1; i >= 0; --i) {
1041 ir_node *proj = get_irn_out(load, i);
1043 switch (get_Proj_proj(proj)) {
1045 exchange(proj, get_Load_mem(load));
1048 mode = get_irn_mode(proj);
1049 if (get_irn_mode(def) != mode) {
1051 dbg_info *db = get_irn_dbg_info(load);
1052 ir_node *block = get_nodes_block(proj);
1053 def = new_rd_Conv(db, current_ir_graph, block, def, mode);
1055 exchange(proj, def);
1057 case pn_Load_X_except:
1058 exchange(proj, new_Bad());
1060 case pn_Load_X_regular:
1061 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(load)));
1064 panic("Unknown Proj from Load");
1070 * Remove a Store memop.
1072 * @param op the Store memop
1074 static void remove_store(memop_t *op) {
1075 ir_node *store = op->node;
1078 DB((dbg, LEVEL_1, "Removing %+F\n", store));
1079 for (i = get_irn_n_outs(store) - 1; i >= 0; --i) {
1080 ir_node *proj = get_irn_out(store, i);
1082 switch (get_Proj_proj(proj)) {
1084 exchange(proj, get_Store_mem(store));
1086 case pn_Store_X_except:
1087 exchange(proj, new_Bad());
1089 case pn_Store_X_regular:
1090 exchange(proj, new_r_Jmp(current_ir_graph, get_nodes_block(store)));
1093 panic("Unknown Proj from Store");
1100 * Do all necessary replacements for a given block.
1102 * @param bl the block
1104 static void do_replacements(block_t *bl) {
1107 for (op = bl->memop_forward; op != NULL; op = op->next) {
1108 if (op->flags & FLAG_KILLED_NODE) {
1109 switch (get_irn_opcode(op->node)) {
1122 * Calculate the Avail_out sets for all basic blocks.
1124 static void calcAvail(void) {
1128 /* calculate avail_out */
1129 DB((dbg, LEVEL_2, "Calculate Avail_out\n"));
1132 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1136 /* over all blocks in reverse post order, skip the start block */
1137 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1138 need_iter |= forward_avail(bl);
1141 } while (need_iter);
1143 /* copy the content of the id_2_memop map into the id_2_memop_avail map
1144 as it must be preserved for later use */
1145 for (bl = env.forward->forward_next; bl != NULL; bl = bl->forward_next) {
1146 memop_t **t = bl->id_2_memop_avail;
1148 bl->id_2_memop_avail = bl->id_2_memop;
1151 DB((dbg, LEVEL_2, "Get avail set after %d iterations\n\n", i));
1155 * Calculate the Antic_in sets for all basic blocks.
1157 static void calcAntic(void) {
1160 /* calculate antic_out */
1161 DB((dbg, LEVEL_2, "Calculate Antic_in\n"));
1166 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1170 /* over all blocks in reverse post order */
1171 for (bl = env.backward->backward_next; bl != NULL; bl = bl->backward_next) {
1172 need_iter |= backward_antic(bl);
1175 } while (need_iter);
1176 DB((dbg, LEVEL_2, "Get anticipated Load set after %d iterations\n", i));
1180 * Return the node representing the last memory in a block.
1182 * @param bl the block
1184 static ir_node *find_first_memory(block_t *bl) {
1186 if (bl->memop_forward != NULL) {
1187 return bl->memop_forward->node;
1189 /* if there is NO memory in this block, go to the post dominator */
1190 bl = get_block_entry(get_Block_ipostdom(bl->block));
1195 * Return the node representing the last memory in a block.
1197 * @param bl the block
1199 static ir_node *find_last_memory(block_t *bl) {
1201 if (bl->memop_backward != NULL) {
1202 return bl->memop_backward->mem;
1204 /* if there is NO memory in this block, go to the dominator */
1205 bl = get_block_entry(get_Block_idom(bl->block));
1210 * Reroute all memory users of old memory
1211 * to a new memory IR-node.
1213 * @param omem the old memory IR-node
1214 * @param nmem the new memory IR-node
1216 static void reroute_all_mem_users(ir_node *omem, ir_node *nmem) {
1219 for (i = get_irn_n_outs(omem) - 1; i >= 0; --i) {
1221 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1223 set_irn_n(user, n_pos, nmem);
1226 /* all edges previously point to omem now point to nmem */
1227 nmem->out = omem->out;
1231 * Reroute memory users of old memory that are dominated by a given block
1232 * to a new memory IR-node.
1234 * @param omem the old memory IR-node
1235 * @param nmem the new memory IR-node
1236 * @param pass_bl the block the memory must pass
1238 static void reroute_mem_through(ir_node *omem, ir_node *nmem, ir_node *pass_bl) {
1239 int i, j, n = get_irn_n_outs(omem);
1240 ir_def_use_edge *edges = NEW_ARR_D(ir_def_use_edge, &env.obst, n + 1);
1242 for (i = j = 0; i < n; ++i) {
1244 ir_node *user = get_irn_out_ex(omem, i, &n_pos);
1245 ir_node *use_bl = get_nodes_block(user);
1249 use_bl = get_Block_cfgpred_block(use_bl, n_pos);
1251 if (block_dominates(pass_bl, use_bl)) {
1252 /* found an user that is dominated */
1254 edges[j].pos = n_pos;
1255 edges[j].use = user;
1257 set_irn_n(user, n_pos, nmem);
1261 /* Modify the out structure: we create a new out edge array on our
1262 temporary obstack here. This should be no problem, as we invalidate the edges
1263 at the end either. */
1264 /* first entry is used for the length */
1273 static void insert_Load(ir_node *block, void *ctx) {
1274 int *new_stuff = ctx;
1276 int i, n = get_Block_n_cfgpreds(block);
1278 unsigned end = env.n_mem_ops * 2 - 1;
1283 bl = get_block_entry(block);
1285 for (pos = rbitset_next(bl->anticL_in, pos, 1); pos != end; pos = rbitset_next(bl->anticL_in, pos + 1, 1)) {
1286 memop_t *op = bl->id_2_memop[pos];
1289 assert(is_Load(op->node));
1291 if (op->flags & FLAG_KILLED_NODE)
1295 for (i = n - 1; i >= 0; --i) {
1296 ir_node *pred = get_Block_cfgpred_block(block, i);
1297 block_t *pred_bl = get_block_entry(pred);
1298 memop_t *e = find_address_avail(pred_bl, &op->value);
1301 ir_node *block = get_nodes_block(op->value.address);
1302 if (! block_dominates(block, pred)) {
1303 /* cannot place a copy here */
1307 pred_bl->avail = NULL;
1314 ir_mode *mode = op->value.mode;
1317 NEW_ARR_A(ir_node *, in, n);
1319 for (i = n - 1; i >= 0; --i) {
1320 ir_node *pred = get_Block_cfgpred_block(block, i);
1321 block_t *pred_bl = get_block_entry(pred);
1323 if (pred_bl->avail == NULL) {
1324 /* create a new Load here and make to make it fully redundant */
1325 dbg_info *db = get_irn_dbg_info(op->node);
1326 ir_node *last_mem = find_last_memory(pred_bl);
1327 ir_node *load, *def;
1330 assert(last_mem != NULL);
1331 load = new_rd_Load(db, current_ir_graph, pred, last_mem, op->value.address, mode, cons_none);
1332 def = new_r_Proj(current_ir_graph, pred, load, mode, pn_Load_res);
1333 DB((dbg, LEVEL_1, "Created new %+F for party redundant %+F\n", load, op->node));
1335 new_op = alloc_memop(load);
1336 new_op->mem = new_r_Proj(current_ir_graph, pred, load, mode_M, pn_Load_M);
1337 new_op->value.address = op->value.address;
1338 new_op->value.id = op->value.id;
1339 new_op->value.mode = mode;
1340 new_op->value.value = def;
1342 new_op->prev = pred_bl->memop_backward;
1343 pred_bl->memop_backward = new_op;
1345 if (get_nodes_block(last_mem) == pred) {
1346 /* We have add a new last memory op in pred block.
1347 If pred had already a last mem, reroute all memory
1349 reroute_all_mem_users(last_mem, new_op->mem);
1351 /* reroute only those memory going through the pre block */
1352 reroute_mem_through(last_mem, new_op->mem, pred);
1355 /* we added this load at the end, so it will be avail anyway */
1356 add_memop_avail(pred_bl, new_op);
1357 pred_bl->avail = new_op;
1359 in[i] = pred_bl->avail->value.value;
1361 phi = new_r_Phi(current_ir_graph, block, n, in, mode);
1362 mark_replace_load(op, phi);
1365 if (get_nodes_block(op->node) == block) {
1366 /* The redundant node was in the current block:
1367 In that case, DO NOT update avail_out. If it was NOT
1368 avail although it is executed in this bLock, it is killed by a later
1372 /* The redundant node is NOT in the current block and anticipated.
1373 This can only happen IFF nothings kills the Load in the current block,
1374 so it will be avail in the next iteration.
1376 add_memop_avail(bl, op);
1378 /* TODO propagate it downwards */
1385 * Insert Loads upwards.
1387 static void insert_Loads_upwards(void) {
1390 /* calculate antic_out */
1391 DB((dbg, LEVEL_2, "Inserting Loads"));
1394 DB((dbg, LEVEL_2, "Iteration %d:\n=========\n", i));
1397 dom_tree_walk_irg(current_ir_graph, insert_Load, NULL, &need_iter);
1399 } while (need_iter);
1400 DB((dbg, LEVEL_2, "Finished Load inserting after %d iterations\n", i));
1403 int opt_ldst(ir_graph *irg) {
1405 ir_graph *rem = current_ir_graph;
1407 current_ir_graph = irg;
1409 FIRM_DBG_REGISTER(dbg, "firm.opt.ldst");
1411 DB((dbg, LEVEL_1, "\nDoing Load/Store optimization on %+F\n", irg));
1413 /* we need landing pads */
1414 remove_critical_cf_edges(irg);
1416 if (get_opt_alias_analysis()) {
1417 assure_irg_entity_usage_computed(irg);
1418 assure_irp_globals_entity_usage_computed();
1421 obstack_init(&env.obst);
1422 ir_nodemap_init(&env.adr_map);
1425 env.backward = NULL;
1426 env.curr_adr_id = 0;
1429 env.start_bl = get_irg_start_block(irg);
1432 assure_irg_outs(irg);
1434 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
1436 /* first step: allocate block entries: produces an
1437 inverse post-order list for the CFG */
1438 irg_out_block_walk(get_irg_start_block(irg), NULL, prepare_blocks, NULL);
1440 /* second step: find and sort all memory ops */
1441 walk_memory_irg(irg, collect_memops, NULL, NULL);
1443 if (env.n_mem_ops == 0) {
1448 /* create the backward links */
1449 irg_block_walk_graph(irg, NULL, collect_backward, NULL);
1451 /* check that we really start with the start / end block */
1452 assert(env.forward->block == env.start_bl);
1453 assert(env.backward->block == get_irg_end_block(irg));
1455 /* create address sets: we know that 2 * n_mem_ops - 1 is an upper bound for all possible addresses */
1456 env.rbs_size = 2 * env.n_mem_ops;
1458 /* create the current set */
1459 env.curr_set = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1460 rbitset_set(env.curr_set, env.rbs_size - 1);
1462 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
1463 /* set sentinel bits */
1464 bl->avail_out = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1465 rbitset_set(bl->avail_out, env.rbs_size - 1);
1467 bl->id_2_memop_avail = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
1468 memset(bl->id_2_memop_avail, 0, env.rbs_size * sizeof(bl->id_2_memop[0]));
1470 bl->anticL_in = rbitset_obstack_alloc(&env.obst, env.rbs_size);
1471 rbitset_set(bl->anticL_in, env.rbs_size - 1);
1473 bl->id_2_memop = NEW_ARR_D(memop_t *, &env.obst, env.rbs_size);
1474 memset(bl->id_2_memop, 0, env.rbs_size * sizeof(bl->id_2_memop[0]));
1477 // dump_block_list(&env);
1482 insert_Loads_upwards();
1485 /* over all blocks in reverse post order */
1486 for (bl = env.forward; bl != NULL; bl = bl->forward_next) {
1487 do_replacements(bl);
1490 /* not only invalidate but free them. We might allocate new out arrays
1491 on our obstack which will be deleted yet. */
1493 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
1497 ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
1498 ir_nodemap_destroy(&env.adr_map);
1499 obstack_free(&env.obst, NULL);
1501 current_ir_graph = rem;
1503 return env.changed != 0;