-#include <stdint.h>
-#include "array.h"
+/*
+ * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
+ *
+ * This file is part of libFirm.
+ *
+ * This file may be distributed and/or modified under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation and appearing in the file LICENSE.GPL included in the
+ * packaging of this file.
+ *
+ * Licensees holding valid libFirm Professional Edition licenses may use
+ * this file in accordance with the libFirm Commercial License.
+ * Agreement provided with the Software.
+ *
+ * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
+ * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE.
+ */
+
+/**
+ * @file
+ * @brief parallelizing Load/Store optimisation
+ * @author Christoph Mallon
+ * @version $Id: $
+ */
+#include "config.h"
+
+#include "iroptimize.h"
+
+#include "array_t.h"
#include "debug.h"
#include "ircons.h"
#include "irgraph.h"
#include "irmemory.h"
#include "irnode.h"
#include "irnodeset.h"
-#include "ldst2.h"
#include "obst.h"
-#include "return.h"
-
+#include "irdump.h"
+#include "irflag_t.h"
+#include "irprintf.h"
+#if +0
#define OPTIMISE_LOAD_AFTER_LOAD
irg_walk_graph(irg, AddressCollector, NULL, &addrs_set);
count_addrs = ir_nodeset_size(&addrs_set);
- DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (uint)count_addrs));
+ DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (unsigned int)count_addrs));
if (count_addrs != 0) {
ir_nodeset_iterator_t addr_iter;
size_t i;
for (i = 0; i < count_addrs; i++) {
ir_node* addr = ir_nodeset_iterator_next(&addr_iter);
assert(addr != NULL);
- set_irn_link(addr, (void*)(uintptr_t)i);
+ set_irn_link(addr, (void *)i);
addrs[i] = addr;
DB((dbg, LEVEL_2, "===> Collected unique symbolic address %+F\n", addr));
}
{
ir_nodeset_t* alias_set;
size_t i;
+ (void) env;
alias_set = NEW_ARR_D(ir_nodeset_t, &obst, count_addrs);
for (i = 0; i < count_addrs; i++) {
{
ir_nodeset_t* alias_set = get_irn_link(block);
size_t i;
+ (void) env;
for (i = 0; i < count_addrs; i++) {
ir_nodeset_destroy(&alias_set[i]);
} else if (is_Store(other)) {
other_addr = get_Store_ptr(other);
} else {
- return may_alias;
+ return ir_may_alias;
}
other_mode = get_irn_mode(other);
}
-static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block);
+static int in_cmp(void const* va, void const* vb)
+{
+ ir_node const* const a = *(ir_node const*const*)va;
+ ir_node const* const b = *(ir_node const*const*)vb;
+ return get_irn_idx(a) - get_irn_idx(b);
+}
-static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi)
+static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_set)
{
- size_t n = get_Phi_n_preds(phi);
- size_t i;
- size_t j;
- ir_node** in;
- ir_nodeset_t* thissets;
+ size_t set_size = ir_nodeset_size(after_set);
+ ir_nodeset_iterator_t iter;
- for (i = 0; i < n; i++) {
- WalkMem(irg, get_Phi_pred(phi, i), block);
+ assert(set_size != 0);
+
+ ir_nodeset_iterator_init(&iter, after_set);
+ if (set_size == 1) {
+ return ir_nodeset_iterator_next(&iter);
+ } else {
+ ir_node** in;
+ size_t i;
+
+ NEW_ARR_A(ir_node*, in, set_size);
+ for (i = 0; i < set_size; i++) {
+ in[i] = ir_nodeset_iterator_next(&iter);
+ }
+ qsort(in, set_size, sizeof(*in), in_cmp);
+ return new_r_Sync(irg, block, set_size, in);
}
+}
+
+
+static ir_node** unfinished_phis;
+
+
+static void PlaceMemPhis(ir_graph* irg, ir_node* block, ir_node* phi)
+{
+ int unfinished = 0;
+ size_t block_n_preds = get_Block_n_cfgpreds(block);
+ ir_nodeset_t* thissets;
+ ir_node** in;
+ size_t i;
+ size_t j;
thissets = get_irn_link(block);
- NEW_ARR_A(ir_node*, in, n);
+ NEW_ARR_A(ir_node*, in, block_n_preds);
for (j = 0; j < count_addrs; j++) {
ir_node* new_phi;
- for (i = 0; i < n; i++) {
- ir_nodeset_t* predsets = get_irn_link(get_nodes_block(get_Phi_pred(phi, i)));
- size_t size = ir_nodeset_size(&predsets[j]);
- ir_nodeset_iterator_t iter;
+ for (i = 0; i < block_n_preds; i++) {
+ ir_node* pred_block = get_nodes_block(get_Phi_pred(phi, i)); // TODO get_Block_cfgpred_block(block, i);
+ ir_nodeset_t* predsets = get_irn_link(pred_block);
+ size_t predset_size = ir_nodeset_size(&predsets[j]);
- ir_nodeset_iterator_init(&iter, &predsets[j]);
- if (size == 0) {
- UNIMPLEMENTED
- } else if (size == 1) {
- in[i] = ir_nodeset_iterator_next(&iter);
+ if (predset_size == 0) {
+ in[i] = new_r_Unknown(irg, mode_M);
+ unfinished = 1;
} else {
- ir_node** sync_in;
- size_t k;
-
- NEW_ARR_A(ir_node*, sync_in, size);
- for (k = 0; k < size; k++) {
- sync_in[k] = ir_nodeset_iterator_next(&iter);
- }
- in[i] = new_r_Sync(irg, get_Block_cfgpred_block(block, i), size, sync_in);
+ in[i] = GenerateSync(irg, pred_block, &predsets[j]);
}
}
- new_phi = new_r_Phi(irg, block, n, in, mode_M);
+ new_phi = new_r_Phi(irg, block, block_n_preds, in, mode_M);
+ if (unfinished) {
+ set_irn_link(new_phi, unfinished_phis[j]);
+ unfinished_phis[j] = new_phi;
+ }
ir_nodeset_insert(&thissets[j], new_phi);
}
+}
+
+
+static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block);
+
+
+static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi)
+{
+ size_t n = get_Phi_n_preds(phi);
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ WalkMem(irg, get_Phi_pred(phi, i), block);
+ }
+ PlaceMemPhis(irg, block, phi);
exchange(phi, new_Bad());
}
static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* memory)
{
ir_node* addr = get_Load_ptr(load);
- size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr);
+ size_t addr_idx = (size_t)get_irn_link(addr);
ir_nodeset_t* interfere_sets = get_irn_link(block);
ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
size_t size = ir_nodeset_size(interfere_set);
ir_nodeset_iterator_init(&interfere_iter, interfere_set);
if (size == 1) {
ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
- if (is_Proj(after)) {
- ir_node* pred = get_Proj_pred(after);
- if (is_Load(pred)) {
-#ifdef OPTIMISE_LOAD_AFTER_LOAD
- if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
- exchange(load, pred);
- return;
- }
-#endif
- after = get_Load_mem(pred);
- }
- }
+ assert(!is_Proj(after) || !is_Load(get_Proj_pred(after)));
DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, after));
set_Load_mem(load, after);
} else {
ir_node** after_set;
- ir_node* sync;
+ ir_node* after;
+ ir_node* mem;
+ size_t i;
NEW_ARR_A(ir_node*, after_set, size);
- for (i = 0; i < size; i++) {
- ir_node* mem = ir_nodeset_iterator_next(&interfere_iter);
+ i = 0;
+ while ((mem = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
if (is_Proj(mem)) {
ir_node* pred = get_Proj_pred(mem);
if (is_Load(pred)) {
return;
}
#endif
- mem = get_Load_mem(pred);
+ continue;
}
}
- after_set[i] = mem;
- sync = new_r_Sync(irg, block, size, after_set);
+ DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, mem));
+ after_set[i++] = mem;
+ }
+ assert(i != 0);
+ if (i == 1) {
+ after = after_set[0];
+ } else {
+ after = new_r_Sync(irg, block, i, after_set);
}
- set_Load_mem(load, sync);
+ set_Load_mem(load, after);
}
for (i = 0; i < count_addrs; i++) {
ir_node* other_addr = addrs[i];
ir_mode* other_mode = mode; // XXX second mode is nonsense
ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
- ir_node* other_node;
DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
- if (rel == no_alias) {
+ if (rel == ir_no_alias) {
continue;
}
DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr));
- ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
- while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
- if (is_Proj(other_node) && is_Load(get_Proj_pred(other_node))) continue;
- if (AliasTest(irg, addr, mode, other_node) != no_alias) {
- DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], load));
- ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
- }
- }
-
ir_nodeset_insert(&interfere_sets[i], memory);
}
}
static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* memory)
{
ir_node* addr = get_Store_ptr(store);
- size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr);
+ size_t addr_idx = (size_t)get_irn_link(addr);
ir_nodeset_t* interfere_sets = get_irn_link(block);
ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
- size_t size = ir_nodeset_size(interfere_set);
- ir_nodeset_iterator_t interfere_iter;
+ ir_node* after;
size_t i;
- assert(size > 0);
- ir_nodeset_iterator_init(&interfere_iter, interfere_set);
- if (size == 1) {
- ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
- DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", store, after));
- set_Store_mem(store, after);
- } else {
- ir_node** after_set;
- ir_node* sync;
-
- NEW_ARR_A(ir_node*, after_set, size);
- for (i = 0; i < size; i++) {
- after_set[i] = ir_nodeset_iterator_next(&interfere_iter);
- sync = new_r_Sync(irg, block, size, after_set);
- }
- set_Store_mem(store, sync);
- }
+ after = GenerateSync(irg, block, interfere_set);
+ set_Store_mem(store, after);
for (i = 0; i < count_addrs; i++) {
+ ir_nodeset_iterator_t interfere_iter;
ir_mode* mode = get_irn_mode(get_Store_value(store));
ir_node* other_addr = addrs[i];
ir_mode* other_mode = mode; // XXX second mode is nonsense
ir_node* other_node;
DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
- if (rel == no_alias) {
+ if (rel == ir_no_alias) {
continue;
}
DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr));
ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
- if (AliasTest(irg, addr, mode, other_node) != no_alias) {
+ if (AliasTest(irg, addr, mode, other_node) != ir_no_alias) {
DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store));
ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
}
ir_nodeset_t* addr_sets;
if (block != last_block) {
+ DB((dbg, LEVEL_3, "===> Changing block from %+F to %+F\n", last_block, block));
block_change = 1;
- if (Block_not_block_visited(block)) {
+ if (!Block_block_visited(block)) {
mark_Block_block_visited(block);
} else {
DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node));
if (is_Phi(node)) {
WalkMemPhi(irg, block, node);
- return 0;
+ return block_change;
} else if (is_Sync(node)) {
UNIMPLEMENTED
} else if (is_Return(node)) {
if (WalkMem(irg, pred, block)) {
// There was a block change
+ size_t block_arity = get_Block_n_cfgpreds(block);
+
DB((dbg, LEVEL_3, "===> There is a block change before %+F\n", node));
- if (get_Block_n_cfgpreds(block) == 1) {
+ if (block_arity == 1) {
// Just one predecessor, inherit its alias sets
- ir_nodeset_t* predsets = get_irn_link(get_nodes_block(pred));
+ ir_node* pred_block = get_nodes_block(pred);
+ ir_nodeset_t* predsets = get_irn_link(pred_block);
ir_nodeset_t* thissets = get_irn_link(block);
size_t i;
DB((dbg, LEVEL_3, "===> Copying the only predecessor's address sets\n"));
- for (i = 0; i < count_addrs; i++) {
- ir_nodeset_iterator_t prediter;
- ir_node* addr;
+ if (ir_nodeset_size(&predsets[0]) == 0) {
+ ir_node* unknown;
+
+ DB((dbg, LEVEL_3, "===> The predecessor was not finished yet\n"));
+ assert(Block_block_visited(pred_block));
- ir_nodeset_iterator_init(&prediter, &predsets[i]);
- while ((addr = ir_nodeset_iterator_next(&prediter)) != NULL) {
- ir_nodeset_insert(&thissets[i], addr);
+ unknown = new_r_Unknown(irg, mode_M);
+ for (i = 0; i < count_addrs; i++) {
+ ir_node* phi_unk = new_r_Phi(irg, block, 1, &unknown, mode_M);
+ DB((dbg, LEVEL_3, "===> Placing unfinished %+F for %+F in %+F\n", phi_unk, addrs[i], block));
+ set_irn_link(phi_unk, unfinished_phis[i]);
+ unfinished_phis[i] = phi_unk;
+ ir_nodeset_insert(&thissets[i], phi_unk);
+ }
+ } else {
+ for (i = 0; i < count_addrs; i++) {
+ ir_nodeset_iterator_t prediter;
+ ir_node* addr;
+
+ ir_nodeset_iterator_init(&prediter, &predsets[i]);
+ while ((addr = ir_nodeset_iterator_next(&prediter)) != NULL) {
+ ir_nodeset_insert(&thissets[i], addr);
+ }
}
}
}
} else {
ir_nodeset_t sync_set;
size_t i;
- size_t sync_arity;
- ir_nodeset_iterator_t sync_set_iter;
+ ir_node* after;
DB((dbg, LEVEL_3, "===> Fallback: %+F aliases everything\n", node));
}
}
- assert(is_Return(node)); // XXX extend to other node types
-
- sync_arity = ir_nodeset_size(&sync_set);
- ir_nodeset_iterator_init(&sync_set_iter, &sync_set);
- if (sync_arity == 1) {
- set_Return_mem(node, ir_nodeset_iterator_next(&sync_set_iter));
- } else {
- ir_node** sync_in;
- ir_node* sync;
-
- NEW_ARR_A(ir_node*, sync_in, sync_arity);
- for (i = 0; i < sync_arity; i++) {
- sync_in[i] = ir_nodeset_iterator_next(&sync_set_iter);
- }
- sync = new_r_Sync(irg, block, sync_arity, sync_in);
- set_Return_mem(node, sync);
- }
+ after = GenerateSync(irg, block, &sync_set);
+ set_irn_n(node, 0, after); // XXX unnice way to set the memory input
for (i = 0; i < count_addrs; i++) {
ir_nodeset_iterator_t iter;
}
+static void FinalisePhis(ir_graph* irg)
+{
+ size_t i;
+
+ for (i = 0; i < count_addrs; i++) {
+ ir_node* next_phi;
+ ir_node* phi;
+
+ for (phi = unfinished_phis[i]; phi != NULL; phi = next_phi) {
+ ir_node* block = get_nodes_block(phi);
+ size_t block_n_preds = get_Block_n_cfgpreds(block);
+
+ next_phi = get_irn_link(phi);
+
+ DB((dbg, LEVEL_4, "===> Finialising phi %+F in %+F\n", phi, block));
+
+ if (block_n_preds == 1) {
+ ir_node* pred_block = get_Block_cfgpred_block(block, 0);
+ ir_nodeset_t* pred_sets = get_irn_link(pred_block);
+ ir_node* after = GenerateSync(irg, pred_block, &pred_sets[i]);
+
+ assert(is_Unknown(get_Phi_pred(phi, 0)));
+ exchange(phi, after);
+ } else {
+ ir_node** in;
+ size_t j;
+
+ NEW_ARR_A(ir_node*, in, block_n_preds);
+ for (j = 0; j < block_n_preds; j++) {
+ ir_node* pred_block = get_Block_cfgpred_block(block, j);
+ ir_nodeset_t* pred_sets = get_irn_link(pred_block);
+
+ if (is_Unknown(get_Phi_pred(phi, j))) {
+ set_Phi_pred(phi, j, GenerateSync(irg, pred_block, &pred_sets[i]));
+ }
+ }
+ }
+ }
+ }
+}
+
+
static void Detotalise(ir_graph* irg)
{
ir_node* end_block = get_irg_end_block(irg);
size_t npreds = get_Block_n_cfgpreds(end_block);
size_t i;
+ unfinished_phis = XMALLOCN(ir_node, count_addrs);
+ for (i = 0; i < count_addrs; i++) {
+ unfinished_phis[i] = NULL;
+ }
+
for (i = 0; i < npreds; i++) {
ir_node* pred = get_Block_cfgpred(end_block, i);
assert(is_Return(pred));
DB((dbg, LEVEL_2, "===> Starting memory walk at %+F\n", pred));
WalkMem(irg, pred, NULL);
}
+
+ FinalisePhis(irg);
+ xfree(unfinished_phis);
}
+#endif
+#if 0
static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync)
{
size_t n = get_Sync_n_preds(sync);
}
}
-
static void NormaliseSync(ir_node* node, void* env)
{
ir_nodeset_t preds;
ir_node** in;
size_t count_preds;
size_t i;
+ (void) env;
if (!is_Sync(node)) return;
AddSyncPreds(&preds, node);
count_preds = ir_nodeset_size(&preds);
- if (count_preds != get_Sync_n_preds(node)) {
+ if (count_preds != (unsigned)get_Sync_n_preds(node)) {
NEW_ARR_A(ir_node*, in, count_preds);
ir_nodeset_iterator_init(&iter, &preds);
for (i = 0; i < count_preds; i++) {
ir_nodeset_destroy(&preds);
}
-
void opt_ldst2(ir_graph* irg)
{
FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2");
DB((dbg, LEVEL_1, "===> Performing load/store optimisation on %+F\n", irg));
normalize_one_return(irg);
+ dump_ir_block_graph(irg, "-prefluffig");
obstack_init(&obst);
inc_irg_block_visited(irg);
SetStartAddressesTop(irg);
Detotalise(irg);
+ dump_ir_block_graph(irg, "-fluffig");
irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL);
obstack_free(&obst, NULL);
irg_walk_graph(irg, NormaliseSync, NULL, NULL);
optimize_graph_df(irg);
irg_walk_graph(irg, NormaliseSync, NULL, NULL);
+ dump_ir_block_graph(irg, "-postfluffig");
+}
+#endif
+
+
+typedef struct parallelise_info
+{
+ ir_node *origin_block;
+ ir_node *origin_ptr;
+ ir_mode *origin_mode;
+ ir_nodeset_t this_mem;
+ ir_nodeset_t user_mem;
+} parallelise_info;
+
+
+static void parallelise_load(parallelise_info *pi, ir_node *irn)
+{
+ /* There is no point in investigating the same subgraph twice */
+ if (ir_nodeset_contains(&pi->user_mem, irn))
+ return;
+
+ //ir_fprintf(stderr, "considering %+F\n", irn);
+ if (get_nodes_block(irn) == pi->origin_block) {
+ if (is_Proj(irn)) {
+ ir_node *pred = get_Proj_pred(irn);
+ if (is_Load(pred) &&
+ get_Load_volatility(pred) == volatility_non_volatile) {
+ ir_node *mem = get_Load_mem(pred);
+ //ir_nodeset_insert(&pi->this_mem, mem);
+ ir_nodeset_insert(&pi->user_mem, irn);
+ //ir_fprintf(stderr, "adding %+F to user set\n", irn);
+ parallelise_load(pi, mem);
+ return;
+ } else if (is_Store(pred) &&
+ get_Store_volatility(pred) == volatility_non_volatile) {
+ ir_mode *org_mode = pi->origin_mode;
+ ir_node *org_ptr = pi->origin_ptr;
+ ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
+ ir_node *store_ptr = get_Store_ptr(pred);
+ if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
+ ir_node *mem = get_Store_mem(pred);
+ //ir_fprintf(stderr, "Ld after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode);
+ ir_nodeset_insert(&pi->user_mem, irn);
+ //ir_fprintf(stderr, "adding %+F to user set\n", irn);
+ parallelise_load(pi, mem);
+ return;
+ }
+ }
+ } else if (is_Sync(irn)) {
+ int n = get_Sync_n_preds(irn);
+ int i;
+
+ for (i = 0; i < n; ++i) {
+ ir_node *sync_pred = get_Sync_pred(irn, i);
+ parallelise_load(pi, sync_pred);
+ }
+ return;
+ }
+ }
+ ir_nodeset_insert(&pi->this_mem, irn);
+ //ir_fprintf(stderr, "adding %+F to this set\n", irn);
+}
+
+
+static void parallelise_store(parallelise_info *pi, ir_node *irn)
+{
+ /* There is no point in investigating the same subgraph twice */
+ if (ir_nodeset_contains(&pi->user_mem, irn))
+ return;
+
+ //ir_fprintf(stderr, "considering %+F\n", irn);
+ if (get_nodes_block(irn) == pi->origin_block) {
+ if (is_Proj(irn)) {
+ ir_node *pred = get_Proj_pred(irn);
+ if (is_Load(pred) &&
+ get_Load_volatility(pred) == volatility_non_volatile) {
+ ir_mode *org_mode = pi->origin_mode;
+ ir_node *org_ptr = pi->origin_ptr;
+ ir_mode *load_mode = get_Load_mode(pred);
+ ir_node *load_ptr = get_Load_ptr(pred);
+ if (get_alias_relation(current_ir_graph, org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
+ ir_node *mem = get_Load_mem(pred);
+ //ir_fprintf(stderr, "St after Ld: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, load_ptr, load_mode);
+ ir_nodeset_insert(&pi->user_mem, irn);
+ //ir_fprintf(stderr, "adding %+F to user set\n", irn);
+ parallelise_store(pi, mem);
+ return;
+ }
+ } else if (is_Store(pred) &&
+ get_Store_volatility(pred) == volatility_non_volatile) {
+ ir_mode *org_mode = pi->origin_mode;
+ ir_node *org_ptr = pi->origin_ptr;
+ ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
+ ir_node *store_ptr = get_Store_ptr(pred);
+ if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
+ ir_node *mem;
+
+ //ir_fprintf(stderr, "St after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode);
+ ir_nodeset_insert(&pi->user_mem, irn);
+ //ir_fprintf(stderr, "adding %+F to user set\n", irn);
+ mem = get_Store_mem(pred);
+ parallelise_store(pi, mem);
+ return;
+ }
+ }
+ } else if (is_Sync(irn)) {
+ int n = get_Sync_n_preds(irn);
+ int i;
+
+ for (i = 0; i < n; ++i) {
+ ir_node *sync_pred = get_Sync_pred(irn, i);
+ parallelise_store(pi, sync_pred);
+ }
+ return;
+ }
+ }
+ ir_nodeset_insert(&pi->this_mem, irn);
+ //ir_fprintf(stderr, "adding %+F to this set\n", irn);
+}
+
+
+static void walker(ir_node *proj, void *env)
+{
+ ir_node *mem_op;
+ ir_node *pred;
+ ir_node *block;
+ int n;
+ parallelise_info pi;
+
+ (void)env;
+
+ if (!is_Proj(proj)) return;
+ if (get_irn_mode(proj) != mode_M) return;
+
+ mem_op = get_Proj_pred(proj);
+ if (is_Load(mem_op)) {
+ if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
+
+ block = get_nodes_block(mem_op);
+ pred = get_Load_mem(mem_op);
+ //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj);
+
+ pi.origin_block = block,
+ pi.origin_ptr = get_Load_ptr(mem_op);
+ pi.origin_mode = get_Load_mode(mem_op);
+ ir_nodeset_init(&pi.this_mem);
+ ir_nodeset_init(&pi.user_mem);
+
+ parallelise_load(&pi, pred);
+ } else if (is_Store(mem_op)) {
+ if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
+
+ block = get_nodes_block(mem_op);
+ pred = get_Store_mem(mem_op);
+ //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj);
+
+ pi.origin_block = block,
+ pi.origin_ptr = get_Store_ptr(mem_op);
+ pi.origin_mode = get_irn_mode(get_Store_value(mem_op));
+ ir_nodeset_init(&pi.this_mem);
+ ir_nodeset_init(&pi.user_mem);
+
+ parallelise_store(&pi, pred);
+ } else {
+ return;
+ }
+
+ n = ir_nodeset_size(&pi.user_mem);
+ if (n != 0) { /* nothing happened otherwise */
+ ir_graph *irg = current_ir_graph;
+ ir_node *sync;
+ ir_node **in;
+ ir_nodeset_iterator_t iter;
+ int i;
+
+ ++n;
+ //ir_fprintf(stderr, "creating sync for users of %+F with %d inputs\n", proj, n);
+ NEW_ARR_A(ir_node*, in, n);
+ i = 0;
+ in[i++] = new_r_Unknown(irg, mode_M);
+ ir_nodeset_iterator_init(&iter, &pi.user_mem);
+ for (;;) {
+ ir_node* p = ir_nodeset_iterator_next(&iter);
+ if (p == NULL) break;
+ in[i++] = p;
+ }
+ assert(i == n);
+ sync = new_r_Sync(irg, block, n, in);
+ exchange(proj, sync);
+
+ assert(pn_Load_M == pn_Store_M);
+ proj = new_r_Proj(irg, block, mem_op, mode_M, pn_Load_M);
+ set_Sync_pred(sync, 0, proj);
+
+ n = ir_nodeset_size(&pi.this_mem);
+ //ir_fprintf(stderr, "creating sync for %+F with %d inputs\n", mem_op, n);
+ ir_nodeset_iterator_init(&iter, &pi.this_mem);
+ if (n == 1) {
+ sync = ir_nodeset_iterator_next(&iter);
+ } else {
+ NEW_ARR_A(ir_node*, in, n);
+ i = 0;
+ for (;;) {
+ ir_node* p = ir_nodeset_iterator_next(&iter);
+ if (p == NULL) break;
+ in[i++] = p;
+ }
+ assert(i == n);
+ sync = new_r_Sync(irg, block, n, in);
+ }
+ set_memop_mem(mem_op, sync);
+ }
+
+ ir_nodeset_destroy(&pi.this_mem);
+ ir_nodeset_destroy(&pi.user_mem);
+}
+
+
+void opt_sync(ir_graph *irg)
+{
+ //assure_irg_entity_usage_computed(irg);
+ //assure_irp_globals_entity_usage_computed();
+
+ irg_walk_graph(irg, NULL, walker, NULL);
+ //optimize_graph_df(irg);
+ //irg_walk_graph(irg, NormaliseSync, NULL, NULL);
}