2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief parallelizing Load/Store optimisation
23 * @author Christoph Mallon
30 #include "iroptimize.h"
41 #include "irnodeset.h"
46 #define OPTIMISE_LOAD_AFTER_LOAD
49 #define UNIMPLEMENTED abort();
52 DEBUG_ONLY(static firm_dbg_module_t *dbg);
55 static struct obstack obst;
56 static size_t count_addrs;
57 static ir_node** addrs;
60 static void AddressCollector(ir_node* node, void* env)
62 ir_nodeset_t* addrs_set = env;
65 addr = get_Load_ptr(node);
66 } else if (is_Store(node)) {
67 addr = get_Store_ptr(node);
71 ir_nodeset_insert(addrs_set, addr);
75 /* Collects all unique addresses used by load and store nodes of a graph and
76 * puts them into an array for later use */
77 static void CollectAddresses(ir_graph* irg)
79 ir_nodeset_t addrs_set;
81 ir_nodeset_init(&addrs_set);
82 irg_walk_graph(irg, AddressCollector, NULL, &addrs_set);
84 count_addrs = ir_nodeset_size(&addrs_set);
85 DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (unsigned int)count_addrs));
86 if (count_addrs != 0) {
87 ir_nodeset_iterator_t addr_iter;
90 addrs = NEW_ARR_D(ir_node*, &obst, count_addrs);
91 ir_nodeset_iterator_init(&addr_iter, &addrs_set);
92 for (i = 0; i < count_addrs; i++) {
93 ir_node* addr = ir_nodeset_iterator_next(&addr_iter);
95 set_irn_link(addr, (void *)i);
97 DB((dbg, LEVEL_2, "===> Collected unique symbolic address %+F\n", addr));
103 static void AliasSetAdder(ir_node* block, void* env)
105 ir_nodeset_t* alias_set;
108 alias_set = NEW_ARR_D(ir_nodeset_t, &obst, count_addrs);
109 for (i = 0; i < count_addrs; i++) {
110 ir_nodeset_init(&alias_set[i]);
112 set_irn_link(block, alias_set);
116 static void SetStartAddressesTop(ir_graph* irg)
118 ir_node* initial_mem;
119 ir_node* start_block;
120 ir_nodeset_t* start_addrs;
123 initial_mem = get_irg_initial_mem(irg);
124 start_block = get_irg_start_block(irg);
125 start_addrs = get_irn_link(start_block);
126 for (i = 0; i < count_addrs; i++) {
127 ir_nodeset_insert(&start_addrs[i], initial_mem);
129 mark_Block_block_visited(start_block);
133 static void AliasSetDestroyer(ir_node* block, void* env)
135 ir_nodeset_t* alias_set = get_irn_link(block);
138 for (i = 0; i < count_addrs; i++) {
139 ir_nodeset_destroy(&alias_set[i]);
144 static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, ir_node* other)
149 if (is_Proj(other)) other = get_Proj_pred(other);
151 if (is_Load(other)) {
152 other_addr = get_Load_ptr(other);
153 } else if (is_Store(other)) {
154 other_addr = get_Store_ptr(other);
159 other_mode = get_irn_mode(other);
160 return get_alias_relation(irg, addr, mode, other_addr, other_mode);
164 static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_set)
166 size_t set_size = ir_nodeset_size(after_set);
167 ir_nodeset_iterator_t iter;
169 assert(set_size != 0);
171 ir_nodeset_iterator_init(&iter, after_set);
173 return ir_nodeset_iterator_next(&iter);
178 NEW_ARR_A(ir_node*, in, set_size);
179 for (i = 0; i < set_size; i++) {
180 in[i] = ir_nodeset_iterator_next(&iter);
182 return new_r_Sync(irg, block, set_size, in);
187 static ir_node** unfinished_phis;
190 static void PlaceMemPhis(ir_graph* irg, ir_node* block, ir_node* phi)
193 size_t block_n_preds = get_Block_n_cfgpreds(block);
194 ir_nodeset_t* thissets;
199 thissets = get_irn_link(block);
200 NEW_ARR_A(ir_node*, in, block_n_preds);
201 for (j = 0; j < count_addrs; j++) {
204 for (i = 0; i < block_n_preds; i++) {
205 ir_node* pred_block = get_nodes_block(get_Phi_pred(phi, i)); // TODO get_Block_cfgpred_block(block, i);
206 ir_nodeset_t* predsets = get_irn_link(pred_block);
207 size_t predset_size = ir_nodeset_size(&predsets[j]);
209 if (predset_size == 0) {
210 in[i] = new_r_Unknown(irg, mode_M);
213 in[i] = GenerateSync(irg, pred_block, &predsets[j]);
216 new_phi = new_r_Phi(irg, block, block_n_preds, in, mode_M);
218 set_irn_link(new_phi, unfinished_phis[j]);
219 unfinished_phis[j] = new_phi;
221 ir_nodeset_insert(&thissets[j], new_phi);
226 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block);
229 static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi)
231 size_t n = get_Phi_n_preds(phi);
234 for (i = 0; i < n; i++) {
235 WalkMem(irg, get_Phi_pred(phi, i), block);
238 PlaceMemPhis(irg, block, phi);
239 exchange(phi, new_Bad());
243 static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* memory)
245 ir_node* addr = get_Load_ptr(load);
246 size_t addr_idx = (size_t)get_irn_link(addr);
247 ir_nodeset_t* interfere_sets = get_irn_link(block);
248 ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
249 size_t size = ir_nodeset_size(interfere_set);
250 ir_nodeset_iterator_t interfere_iter;
254 ir_nodeset_iterator_init(&interfere_iter, interfere_set);
256 ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
257 assert(!is_Proj(after) || !is_Load(get_Proj_pred(after)));
258 DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, after));
259 set_Load_mem(load, after);
266 NEW_ARR_A(ir_node*, after_set, size);
268 while ((mem = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
270 ir_node* pred = get_Proj_pred(mem);
272 #ifdef OPTIMISE_LOAD_AFTER_LOAD
273 if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
274 exchange(load, pred);
281 DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, mem));
282 after_set[i++] = mem;
286 after = after_set[0];
288 after = new_r_Sync(irg, block, i, after_set);
290 set_Load_mem(load, after);
293 for (i = 0; i < count_addrs; i++) {
294 ir_mode* mode = get_Load_mode(load);
295 ir_node* other_addr = addrs[i];
296 ir_mode* other_mode = mode; // XXX second mode is nonsense
297 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
299 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
300 if (rel == no_alias) {
303 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr));
305 ir_nodeset_insert(&interfere_sets[i], memory);
310 static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* memory)
312 ir_node* addr = get_Store_ptr(store);
313 size_t addr_idx = (size_t)get_irn_link(addr);
314 ir_nodeset_t* interfere_sets = get_irn_link(block);
315 ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
319 after = GenerateSync(irg, block, interfere_set);
320 set_Store_mem(store, after);
322 for (i = 0; i < count_addrs; i++) {
323 ir_nodeset_iterator_t interfere_iter;
324 ir_mode* mode = get_irn_mode(get_Store_value(store));
325 ir_node* other_addr = addrs[i];
326 ir_mode* other_mode = mode; // XXX second mode is nonsense
327 ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
330 DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
331 if (rel == no_alias) {
334 DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr));
336 ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
337 while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
338 if (AliasTest(irg, addr, mode, other_node) != no_alias) {
339 DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store));
340 ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
344 ir_nodeset_insert(&interfere_sets[i], memory);
349 static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block)
351 int block_change = 0;
352 ir_node* block = get_nodes_block(node);
354 ir_node* memory = node;
355 ir_nodeset_t* addr_sets;
357 if (block != last_block) {
358 DB((dbg, LEVEL_3, "===> Changing block from %+F to %+F\n", last_block, block));
360 if (Block_not_block_visited(block)) {
361 mark_Block_block_visited(block);
363 DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node));
369 if (is_Proj(node)) node = get_Proj_pred(node);
372 WalkMemPhi(irg, block, node);
374 } else if (is_Sync(node)) {
376 } else if (is_Return(node)) {
377 pred = get_Return_mem(node);
379 pred = get_fragile_op_mem(node);
382 if (WalkMem(irg, pred, block)) {
383 // There was a block change
384 size_t block_arity = get_Block_n_cfgpreds(block);
386 DB((dbg, LEVEL_3, "===> There is a block change before %+F\n", node));
387 if (block_arity == 1) {
388 // Just one predecessor, inherit its alias sets
389 ir_node* pred_block = get_nodes_block(pred);
390 ir_nodeset_t* predsets = get_irn_link(pred_block);
391 ir_nodeset_t* thissets = get_irn_link(block);
394 DB((dbg, LEVEL_3, "===> Copying the only predecessor's address sets\n"));
396 if (ir_nodeset_size(&predsets[0]) == 0) {
399 DB((dbg, LEVEL_3, "===> The predecessor was not finished yet\n"));
400 assert(!Block_not_block_visited(pred_block));
402 unknown = new_r_Unknown(irg, mode_M);
403 for (i = 0; i < count_addrs; i++) {
404 ir_node* phi_unk = new_r_Phi(irg, block, 1, &unknown, mode_M);
405 DB((dbg, LEVEL_3, "===> Placing unfinished %+F for %+F in %+F\n", phi_unk, addrs[i], block));
406 set_irn_link(phi_unk, unfinished_phis[i]);
407 unfinished_phis[i] = phi_unk;
408 ir_nodeset_insert(&thissets[i], phi_unk);
411 for (i = 0; i < count_addrs; i++) {
412 ir_nodeset_iterator_t prediter;
415 ir_nodeset_iterator_init(&prediter, &predsets[i]);
416 while ((addr = ir_nodeset_iterator_next(&prediter)) != NULL) {
417 ir_nodeset_insert(&thissets[i], addr);
424 DB((dbg, LEVEL_3, "===> Detotalising %+F\n", node));
426 addr_sets = get_irn_link(block);
429 PlaceLoad(irg, block, node, memory);
430 } else if (is_Store(node)) {
431 PlaceStore(irg, block, node, memory);
433 ir_nodeset_t sync_set;
437 DB((dbg, LEVEL_3, "===> Fallback: %+F aliases everything\n", node));
439 ir_nodeset_init(&sync_set);
440 for (i = 0; i < count_addrs; i++) {
441 ir_nodeset_iterator_t iter;
444 ir_nodeset_iterator_init(&iter, &addr_sets[i]);
445 while ((mem = ir_nodeset_iterator_next(&iter)) != NULL) {
446 ir_nodeset_insert(&sync_set, mem);
450 after = GenerateSync(irg, block, &sync_set);
451 set_irn_n(node, 0, after); // XXX unnice way to set the memory input
453 for (i = 0; i < count_addrs; i++) {
454 ir_nodeset_iterator_t iter;
455 ir_nodeset_iterator_init(&iter, &addr_sets[i]);
456 while (ir_nodeset_iterator_next(&iter) != NULL) {
457 ir_nodeset_remove_iterator(&addr_sets[i], &iter);
459 ir_nodeset_insert(&addr_sets[i], memory);
467 static void FinalisePhis(ir_graph* irg)
471 for (i = 0; i < count_addrs; i++) {
475 for (phi = unfinished_phis[i]; phi != NULL; phi = next_phi) {
476 ir_node* block = get_nodes_block(phi);
477 size_t block_n_preds = get_Block_n_cfgpreds(block);
479 next_phi = get_irn_link(phi);
481 DB((dbg, LEVEL_4, "===> Finialising phi %+F in %+F\n", phi, block));
483 if (block_n_preds == 1) {
484 ir_node* pred_block = get_Block_cfgpred_block(block, 0);
485 ir_nodeset_t* pred_sets = get_irn_link(pred_block);
486 ir_node* after = GenerateSync(irg, pred_block, &pred_sets[i]);
488 assert(is_Unknown(get_Phi_pred(phi, 0)));
489 exchange(phi, after);
494 NEW_ARR_A(ir_node*, in, block_n_preds);
495 for (j = 0; j < block_n_preds; j++) {
496 ir_node* pred_block = get_Block_cfgpred_block(block, j);
497 ir_nodeset_t* pred_sets = get_irn_link(pred_block);
499 if (is_Unknown(get_Phi_pred(phi, j))) {
500 set_Phi_pred(phi, j, GenerateSync(irg, pred_block, &pred_sets[i]));
509 static void Detotalise(ir_graph* irg)
511 ir_node* end_block = get_irg_end_block(irg);
512 size_t npreds = get_Block_n_cfgpreds(end_block);
515 unfinished_phis = xmalloc(sizeof(*unfinished_phis) * count_addrs);
516 for (i = 0; i < count_addrs; i++) {
517 unfinished_phis[i] = NULL;
520 for (i = 0; i < npreds; i++) {
521 ir_node* pred = get_Block_cfgpred(end_block, i);
522 assert(is_Return(pred));
523 DB((dbg, LEVEL_2, "===> Starting memory walk at %+F\n", pred));
524 WalkMem(irg, pred, NULL);
528 xfree(unfinished_phis);
532 static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync)
534 size_t n = get_Sync_n_preds(sync);
537 for (i = 0; i < n; i++) {
538 ir_node* pred = get_Sync_pred(sync, i);
540 AddSyncPreds(preds, pred);
542 ir_nodeset_insert(preds, pred);
548 static void NormaliseSync(ir_node* node, void* env)
551 ir_nodeset_iterator_t iter;
556 if (!is_Sync(node)) return;
558 ir_nodeset_init(&preds);
559 AddSyncPreds(&preds, node);
561 count_preds = ir_nodeset_size(&preds);
562 if (count_preds != get_Sync_n_preds(node)) {
563 NEW_ARR_A(ir_node*, in, count_preds);
564 ir_nodeset_iterator_init(&iter, &preds);
565 for (i = 0; i < count_preds; i++) {
566 ir_node* pred = ir_nodeset_iterator_next(&iter);
567 assert(pred != NULL);
570 set_irn_in(node, count_preds, in);
573 ir_nodeset_destroy(&preds);
577 void opt_ldst2(ir_graph* irg)
579 FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2");
580 DB((dbg, LEVEL_1, "===> Performing load/store optimisation on %+F\n", irg));
582 normalize_one_return(irg);
583 dump_ir_block_graph(irg, "-prefluffig");
587 if (1 /* XXX */ || get_opt_alias_analysis()) {
588 assure_irg_address_taken_computed(irg);
589 assure_irp_globals_address_taken_computed();
593 CollectAddresses(irg);
594 if (count_addrs == 0) return;
596 irg_block_walk_graph(irg, AliasSetAdder, NULL, NULL);
597 inc_irg_block_visited(irg);
598 SetStartAddressesTop(irg);
600 dump_ir_block_graph(irg, "-fluffig");
602 irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL);
603 obstack_free(&obst, NULL);
605 normalize_proj_nodes(irg);
606 irg_walk_graph(irg, NormaliseSync, NULL, NULL);
607 optimize_graph_df(irg);
608 irg_walk_graph(irg, NormaliseSync, NULL, NULL);
609 dump_ir_block_graph(irg, "-postfluffig");