X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fopt%2Fldst2.c;h=0c72988eb1efc96ab6f0d319873b1984d11784a3;hb=8f530048146e640fdfeb16b8ebe8bc997a1c8abe;hp=20b8109079ae00b78e05b9fe6389a7383eb112c0;hpb=b519dd6a1e6d85e843eff533be787d1f138a07ff;p=libfirm diff --git a/ir/opt/ldst2.c b/ir/opt/ldst2.c index 20b810907..0c72988eb 100644 --- a/ir/opt/ldst2.c +++ b/ir/opt/ldst2.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved. + * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved. * * This file is part of libFirm. * @@ -23,13 +23,11 @@ * @author Christoph Mallon * @version $Id: $ */ -#ifdef HAVE_CONFIG_H #include "config.h" -#endif #include "iroptimize.h" -#include "array.h" +#include "array_t.h" #include "debug.h" #include "ircons.h" #include "irgraph.h" @@ -42,7 +40,10 @@ #include "obst.h" #include "irdump.h" #include "irflag_t.h" +#include "irprintf.h" +#include "irtools.h" +#if +0 #define OPTIMISE_LOAD_AFTER_LOAD @@ -155,7 +156,7 @@ static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, } else if (is_Store(other)) { other_addr = get_Store_ptr(other); } else { - return may_alias; + return ir_may_alias; } other_mode = get_irn_mode(other); @@ -163,6 +164,14 @@ static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, } +static int in_cmp(void const* va, void const* vb) +{ + ir_node const* const a = *(ir_node const*const*)va; + ir_node const* const b = *(ir_node const*const*)vb; + return get_irn_idx(a) - get_irn_idx(b); +} + + static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_set) { size_t set_size = ir_nodeset_size(after_set); @@ -181,6 +190,7 @@ static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_ for (i = 0; i < set_size; i++) { in[i] = ir_nodeset_iterator_next(&iter); } + qsort(in, set_size, sizeof(*in), in_cmp); return new_r_Sync(irg, block, set_size, in); } } @@ -299,7 +309,7 @@ static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* mem ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode); DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel)); - if (rel == no_alias) { + if (rel == ir_no_alias) { continue; } DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr)); @@ -330,14 +340,14 @@ static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* m ir_node* other_node; DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel)); - if (rel == no_alias) { + if (rel == ir_no_alias) { continue; } DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr)); ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]); while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) { - if (AliasTest(irg, addr, mode, other_node) != no_alias) { + if (AliasTest(irg, addr, mode, other_node) != ir_no_alias) { DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store)); ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter); } @@ -359,7 +369,7 @@ static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block) if (block != last_block) { DB((dbg, LEVEL_3, "===> Changing block from %+F to %+F\n", last_block, block)); block_change = 1; - if (Block_not_block_visited(block)) { + if (!Block_block_visited(block)) { mark_Block_block_visited(block); } else { DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node)); @@ -399,7 +409,7 @@ static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block) ir_node* unknown; DB((dbg, LEVEL_3, "===> The predecessor was not finished yet\n")); - assert(!Block_not_block_visited(pred_block)); + assert(Block_block_visited(pred_block)); unknown = new_r_Unknown(irg, mode_M); for (i = 0; i < count_addrs; i++) { @@ -514,7 +524,7 @@ static void Detotalise(ir_graph* irg) size_t npreds = get_Block_n_cfgpreds(end_block); size_t i; - unfinished_phis = xmalloc(sizeof(*unfinished_phis) * count_addrs); + unfinished_phis = XMALLOCN(ir_node, count_addrs); for (i = 0; i < count_addrs; i++) { unfinished_phis[i] = NULL; } @@ -529,8 +539,10 @@ static void Detotalise(ir_graph* irg) FinalisePhis(irg); xfree(unfinished_phis); } +#endif +#if 0 static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync) { size_t n = get_Sync_n_preds(sync); @@ -546,7 +558,6 @@ static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync) } } - static void NormaliseSync(ir_node* node, void* env) { ir_nodeset_t preds; @@ -576,7 +587,6 @@ static void NormaliseSync(ir_node* node, void* env) ir_nodeset_destroy(&preds); } - void opt_ldst2(ir_graph* irg) { FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2"); @@ -605,8 +615,239 @@ void opt_ldst2(ir_graph* irg) irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL); obstack_free(&obst, NULL); + normalize_proj_nodes(irg); irg_walk_graph(irg, NormaliseSync, NULL, NULL); - optimize_graph_df(irg); + optimize_graph_df(irg); irg_walk_graph(irg, NormaliseSync, NULL, NULL); dump_ir_block_graph(irg, "-postfluffig"); } +#endif + + +typedef struct parallelise_info +{ + ir_node *origin_block; + ir_node *origin_ptr; + ir_mode *origin_mode; + ir_nodeset_t this_mem; + ir_nodeset_t user_mem; +} parallelise_info; + + +static void parallelise_load(parallelise_info *pi, ir_node *irn) +{ + /* There is no point in investigating the same subgraph twice */ + if (ir_nodeset_contains(&pi->user_mem, irn)) + return; + + //ir_fprintf(stderr, "considering %+F\n", irn); + if (get_nodes_block(irn) == pi->origin_block) { + if (is_Proj(irn)) { + ir_node *pred = get_Proj_pred(irn); + if (is_Load(pred) && + get_Load_volatility(pred) == volatility_non_volatile) { + ir_node *mem = get_Load_mem(pred); + //ir_nodeset_insert(&pi->this_mem, mem); + ir_nodeset_insert(&pi->user_mem, irn); + //ir_fprintf(stderr, "adding %+F to user set\n", irn); + parallelise_load(pi, mem); + return; + } else if (is_Store(pred) && + get_Store_volatility(pred) == volatility_non_volatile) { + ir_mode *org_mode = pi->origin_mode; + ir_node *org_ptr = pi->origin_ptr; + ir_mode *store_mode = get_irn_mode(get_Store_value(pred)); + ir_node *store_ptr = get_Store_ptr(pred); + if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) { + ir_node *mem = get_Store_mem(pred); + //ir_fprintf(stderr, "Ld after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode); + ir_nodeset_insert(&pi->user_mem, irn); + //ir_fprintf(stderr, "adding %+F to user set\n", irn); + parallelise_load(pi, mem); + return; + } + } + } else if (is_Sync(irn)) { + int n = get_Sync_n_preds(irn); + int i; + + for (i = 0; i < n; ++i) { + ir_node *sync_pred = get_Sync_pred(irn, i); + parallelise_load(pi, sync_pred); + } + return; + } + } + ir_nodeset_insert(&pi->this_mem, irn); + //ir_fprintf(stderr, "adding %+F to this set\n", irn); +} + + +static void parallelise_store(parallelise_info *pi, ir_node *irn) +{ + /* There is no point in investigating the same subgraph twice */ + if (ir_nodeset_contains(&pi->user_mem, irn)) + return; + + //ir_fprintf(stderr, "considering %+F\n", irn); + if (get_nodes_block(irn) == pi->origin_block) { + if (is_Proj(irn)) { + ir_node *pred = get_Proj_pred(irn); + if (is_Load(pred) && + get_Load_volatility(pred) == volatility_non_volatile) { + ir_mode *org_mode = pi->origin_mode; + ir_node *org_ptr = pi->origin_ptr; + ir_mode *load_mode = get_Load_mode(pred); + ir_node *load_ptr = get_Load_ptr(pred); + if (get_alias_relation(current_ir_graph, org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) { + ir_node *mem = get_Load_mem(pred); + //ir_fprintf(stderr, "St after Ld: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, load_ptr, load_mode); + ir_nodeset_insert(&pi->user_mem, irn); + //ir_fprintf(stderr, "adding %+F to user set\n", irn); + parallelise_store(pi, mem); + return; + } + } else if (is_Store(pred) && + get_Store_volatility(pred) == volatility_non_volatile) { + ir_mode *org_mode = pi->origin_mode; + ir_node *org_ptr = pi->origin_ptr; + ir_mode *store_mode = get_irn_mode(get_Store_value(pred)); + ir_node *store_ptr = get_Store_ptr(pred); + if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) { + ir_node *mem; + + //ir_fprintf(stderr, "St after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode); + ir_nodeset_insert(&pi->user_mem, irn); + //ir_fprintf(stderr, "adding %+F to user set\n", irn); + mem = get_Store_mem(pred); + parallelise_store(pi, mem); + return; + } + } + } else if (is_Sync(irn)) { + int n = get_Sync_n_preds(irn); + int i; + + for (i = 0; i < n; ++i) { + ir_node *sync_pred = get_Sync_pred(irn, i); + parallelise_store(pi, sync_pred); + } + return; + } + } + ir_nodeset_insert(&pi->this_mem, irn); + //ir_fprintf(stderr, "adding %+F to this set\n", irn); +} + + +static void walker(ir_node *proj, void *env) +{ + ir_node *mem_op; + ir_node *pred; + ir_node *block; + int n; + parallelise_info pi; + + (void)env; + + if (!is_Proj(proj)) return; + if (get_irn_mode(proj) != mode_M) return; + + mem_op = get_Proj_pred(proj); + if (is_Load(mem_op)) { + if (get_Load_volatility(mem_op) != volatility_non_volatile) return; + + block = get_nodes_block(mem_op); + pred = get_Load_mem(mem_op); + //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj); + + pi.origin_block = block, + pi.origin_ptr = get_Load_ptr(mem_op); + pi.origin_mode = get_Load_mode(mem_op); + ir_nodeset_init(&pi.this_mem); + ir_nodeset_init(&pi.user_mem); + + parallelise_load(&pi, pred); + } else if (is_Store(mem_op)) { + if (get_Store_volatility(mem_op) != volatility_non_volatile) return; + + block = get_nodes_block(mem_op); + pred = get_Store_mem(mem_op); + //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj); + + pi.origin_block = block, + pi.origin_ptr = get_Store_ptr(mem_op); + pi.origin_mode = get_irn_mode(get_Store_value(mem_op)); + ir_nodeset_init(&pi.this_mem); + ir_nodeset_init(&pi.user_mem); + + parallelise_store(&pi, pred); + } else { + return; + } + + n = ir_nodeset_size(&pi.user_mem); + if (n != 0) { /* nothing happened otherwise */ + ir_graph *irg = current_ir_graph; + ir_node *sync; + ir_node **in; + ir_nodeset_iterator_t iter; + int i; + + ++n; + //ir_fprintf(stderr, "creating sync for users of %+F with %d inputs\n", proj, n); + NEW_ARR_A(ir_node*, in, n); + i = 0; + in[i++] = new_r_Unknown(irg, mode_M); + ir_nodeset_iterator_init(&iter, &pi.user_mem); + for (;;) { + ir_node* p = ir_nodeset_iterator_next(&iter); + if (p == NULL) break; + in[i++] = p; + } + assert(i == n); + sync = new_r_Sync(block, n, in); + exchange(proj, sync); + + assert(pn_Load_M == pn_Store_M); + proj = new_r_Proj(block, mem_op, mode_M, pn_Load_M); + set_Sync_pred(sync, 0, proj); + + n = ir_nodeset_size(&pi.this_mem); + //ir_fprintf(stderr, "creating sync for %+F with %d inputs\n", mem_op, n); + ir_nodeset_iterator_init(&iter, &pi.this_mem); + if (n == 1) { + sync = ir_nodeset_iterator_next(&iter); + } else { + NEW_ARR_A(ir_node*, in, n); + i = 0; + for (;;) { + ir_node* p = ir_nodeset_iterator_next(&iter); + if (p == NULL) break; + in[i++] = p; + } + assert(i == n); + sync = new_r_Sync(block, n, in); + } + set_memop_mem(mem_op, sync); + } + + ir_nodeset_destroy(&pi.this_mem); + ir_nodeset_destroy(&pi.user_mem); +} + + +void opt_sync(ir_graph *irg) +{ + //assure_irg_entity_usage_computed(irg); + //assure_irp_globals_entity_usage_computed(); + + irg_walk_graph(irg, NULL, walker, NULL); + //optimize_graph_df(irg); + //irg_walk_graph(irg, NormaliseSync, NULL, NULL); +} + +ir_graph_pass_t *opt_sync_pass(const char *name, int verify, int dump) +{ + return def_graph_pass(name ? name : "opt_sync", verify, dump, opt_sync); +}