X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fopt%2Fldst2.c;h=4184e305b8250444dbaba07734534a9d035225cc;hb=188a17803798f8d78b1c02c3b68976056bce33d9;hp=b43efc31236c91309268de5c786f70ffcd675d39;hpb=a9a30bfe3246ea9c3d6520f849989ae997918892;p=libfirm diff --git a/ir/opt/ldst2.c b/ir/opt/ldst2.c index b43efc312..4184e305b 100644 --- a/ir/opt/ldst2.c +++ b/ir/opt/ldst2.c @@ -1,5 +1,33 @@ -#include -#include "array.h" +/* + * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved. + * + * This file is part of libFirm. + * + * This file may be distributed and/or modified under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation and appearing in the file LICENSE.GPL included in the + * packaging of this file. + * + * Licensees holding valid libFirm Professional Edition licenses may use + * this file in accordance with the libFirm Commercial License. + * Agreement provided with the Software. + * + * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE + * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE. + */ + +/** + * @file + * @brief parallelizing Load/Store optimisation + * @author Christoph Mallon + * @version $Id: $ + */ +#include "config.h" + +#include "iroptimize.h" + +#include "array_t.h" #include "debug.h" #include "ircons.h" #include "irgraph.h" @@ -9,11 +37,12 @@ #include "irmemory.h" #include "irnode.h" #include "irnodeset.h" -#include "ldst2.h" #include "obst.h" -#include "return.h" - +#include "irdump.h" +#include "irflag_t.h" +#include "irprintf.h" +#if +0 #define OPTIMISE_LOAD_AFTER_LOAD @@ -53,7 +82,7 @@ static void CollectAddresses(ir_graph* irg) irg_walk_graph(irg, AddressCollector, NULL, &addrs_set); count_addrs = ir_nodeset_size(&addrs_set); - DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (uint)count_addrs)); + DB((dbg, LEVEL_1, "===> %+F uses %u unique addresses\n", irg, (unsigned int)count_addrs)); if (count_addrs != 0) { ir_nodeset_iterator_t addr_iter; size_t i; @@ -63,7 +92,7 @@ static void CollectAddresses(ir_graph* irg) for (i = 0; i < count_addrs; i++) { ir_node* addr = ir_nodeset_iterator_next(&addr_iter); assert(addr != NULL); - set_irn_link(addr, (void*)(uintptr_t)i); + set_irn_link(addr, (void *)i); addrs[i] = addr; DB((dbg, LEVEL_2, "===> Collected unique symbolic address %+F\n", addr)); } @@ -75,6 +104,7 @@ static void AliasSetAdder(ir_node* block, void* env) { ir_nodeset_t* alias_set; size_t i; + (void) env; alias_set = NEW_ARR_D(ir_nodeset_t, &obst, count_addrs); for (i = 0; i < count_addrs; i++) { @@ -105,6 +135,7 @@ static void AliasSetDestroyer(ir_node* block, void* env) { ir_nodeset_t* alias_set = get_irn_link(block); size_t i; + (void) env; for (i = 0; i < count_addrs; i++) { ir_nodeset_destroy(&alias_set[i]); @@ -124,7 +155,7 @@ static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, } else if (is_Store(other)) { other_addr = get_Store_ptr(other); } else { - return may_alias; + return ir_may_alias; } other_mode = get_irn_mode(other); @@ -132,6 +163,14 @@ static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode, } +static int in_cmp(void const* va, void const* vb) +{ + ir_node const* const a = *(ir_node const*const*)va; + ir_node const* const b = *(ir_node const*const*)vb; + return get_irn_idx(a) - get_irn_idx(b); +} + + static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_set) { size_t set_size = ir_nodeset_size(after_set); @@ -150,6 +189,7 @@ static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_ for (i = 0; i < set_size; i++) { in[i] = ir_nodeset_iterator_next(&iter); } + qsort(in, set_size, sizeof(*in), in_cmp); return new_r_Sync(irg, block, set_size, in); } } @@ -214,7 +254,7 @@ static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi) static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* memory) { ir_node* addr = get_Load_ptr(load); - size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr); + size_t addr_idx = (size_t)get_irn_link(addr); ir_nodeset_t* interfere_sets = get_irn_link(block); ir_nodeset_t* interfere_set = &interfere_sets[addr_idx]; size_t size = ir_nodeset_size(interfere_set); @@ -225,27 +265,18 @@ static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* mem ir_nodeset_iterator_init(&interfere_iter, interfere_set); if (size == 1) { ir_node* after = ir_nodeset_iterator_next(&interfere_iter); - if (is_Proj(after)) { - ir_node* pred = get_Proj_pred(after); - if (is_Load(pred)) { -#ifdef OPTIMISE_LOAD_AFTER_LOAD - if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) { - exchange(load, pred); - return; - } -#endif - after = get_Load_mem(pred); - } - } + assert(!is_Proj(after) || !is_Load(get_Proj_pred(after))); DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, after)); set_Load_mem(load, after); } else { ir_node** after_set; - ir_node* sync; + ir_node* after; + ir_node* mem; + size_t i; NEW_ARR_A(ir_node*, after_set, size); - for (i = 0; i < size; i++) { - ir_node* mem = ir_nodeset_iterator_next(&interfere_iter); + i = 0; + while ((mem = ir_nodeset_iterator_next(&interfere_iter)) != NULL) { if (is_Proj(mem)) { ir_node* pred = get_Proj_pred(mem); if (is_Load(pred)) { @@ -255,13 +286,19 @@ static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* mem return; } #endif - mem = get_Load_mem(pred); + continue; } } - after_set[i] = mem; - sync = new_r_Sync(irg, block, size, after_set); + DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, mem)); + after_set[i++] = mem; } - set_Load_mem(load, sync); + assert(i != 0); + if (i == 1) { + after = after_set[0]; + } else { + after = new_r_Sync(irg, block, i, after_set); + } + set_Load_mem(load, after); } for (i = 0; i < count_addrs; i++) { @@ -269,23 +306,13 @@ static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* mem ir_node* other_addr = addrs[i]; ir_mode* other_mode = mode; // XXX second mode is nonsense ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode); - ir_node* other_node; DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel)); - if (rel == no_alias) { + if (rel == ir_no_alias) { continue; } DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr)); - ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]); - while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) { - if (is_Proj(other_node) && is_Load(get_Proj_pred(other_node))) continue; - if (AliasTest(irg, addr, mode, other_node) != no_alias) { - DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], load)); - ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter); - } - } - ir_nodeset_insert(&interfere_sets[i], memory); } } @@ -294,7 +321,7 @@ static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* mem static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* memory) { ir_node* addr = get_Store_ptr(store); - size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr); + size_t addr_idx = (size_t)get_irn_link(addr); ir_nodeset_t* interfere_sets = get_irn_link(block); ir_nodeset_t* interfere_set = &interfere_sets[addr_idx]; ir_node* after; @@ -312,14 +339,14 @@ static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* m ir_node* other_node; DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel)); - if (rel == no_alias) { + if (rel == ir_no_alias) { continue; } DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr)); ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]); while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) { - if (AliasTest(irg, addr, mode, other_node) != no_alias) { + if (AliasTest(irg, addr, mode, other_node) != ir_no_alias) { DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store)); ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter); } @@ -341,7 +368,7 @@ static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block) if (block != last_block) { DB((dbg, LEVEL_3, "===> Changing block from %+F to %+F\n", last_block, block)); block_change = 1; - if (Block_not_block_visited(block)) { + if (!Block_block_visited(block)) { mark_Block_block_visited(block); } else { DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node)); @@ -365,8 +392,9 @@ static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block) if (WalkMem(irg, pred, block)) { // There was a block change - DB((dbg, LEVEL_3, "===> There is a block change before %+F\n", node)); size_t block_arity = get_Block_n_cfgpreds(block); + + DB((dbg, LEVEL_3, "===> There is a block change before %+F\n", node)); if (block_arity == 1) { // Just one predecessor, inherit its alias sets ir_node* pred_block = get_nodes_block(pred); @@ -380,11 +408,12 @@ static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block) ir_node* unknown; DB((dbg, LEVEL_3, "===> The predecessor was not finished yet\n")); - assert(!Block_not_block_visited(pred_block)); + assert(Block_block_visited(pred_block)); unknown = new_r_Unknown(irg, mode_M); for (i = 0; i < count_addrs; i++) { ir_node* phi_unk = new_r_Phi(irg, block, 1, &unknown, mode_M); + DB((dbg, LEVEL_3, "===> Placing unfinished %+F for %+F in %+F\n", phi_unk, addrs[i], block)); set_irn_link(phi_unk, unfinished_phis[i]); unfinished_phis[i] = phi_unk; ir_nodeset_insert(&thissets[i], phi_unk); @@ -494,7 +523,7 @@ static void Detotalise(ir_graph* irg) size_t npreds = get_Block_n_cfgpreds(end_block); size_t i; - unfinished_phis = xmalloc(sizeof(*unfinished_phis) * count_addrs); + unfinished_phis = XMALLOCN(ir_node, count_addrs); for (i = 0; i < count_addrs; i++) { unfinished_phis[i] = NULL; } @@ -509,6 +538,7 @@ static void Detotalise(ir_graph* irg) FinalisePhis(irg); xfree(unfinished_phis); } +#endif static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync) @@ -526,7 +556,7 @@ static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync) } } - +#if 0 static void NormaliseSync(ir_node* node, void* env) { ir_nodeset_t preds; @@ -534,6 +564,7 @@ static void NormaliseSync(ir_node* node, void* env) ir_node** in; size_t count_preds; size_t i; + (void) env; if (!is_Sync(node)) return; @@ -541,7 +572,7 @@ static void NormaliseSync(ir_node* node, void* env) AddSyncPreds(&preds, node); count_preds = ir_nodeset_size(&preds); - if (count_preds != get_Sync_n_preds(node)) { + if (count_preds != (unsigned)get_Sync_n_preds(node)) { NEW_ARR_A(ir_node*, in, count_preds); ir_nodeset_iterator_init(&iter, &preds); for (i = 0; i < count_preds; i++) { @@ -555,13 +586,13 @@ static void NormaliseSync(ir_node* node, void* env) ir_nodeset_destroy(&preds); } - void opt_ldst2(ir_graph* irg) { FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2"); DB((dbg, LEVEL_1, "===> Performing load/store optimisation on %+F\n", irg)); normalize_one_return(irg); + dump_ir_block_graph(irg, "-prefluffig"); obstack_init(&obst); @@ -578,7 +609,6 @@ void opt_ldst2(ir_graph* irg) inc_irg_block_visited(irg); SetStartAddressesTop(irg); Detotalise(irg); - dump_ir_block_graph(irg, "-fluffig"); irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL); @@ -588,4 +618,230 @@ void opt_ldst2(ir_graph* irg) irg_walk_graph(irg, NormaliseSync, NULL, NULL); optimize_graph_df(irg); irg_walk_graph(irg, NormaliseSync, NULL, NULL); + dump_ir_block_graph(irg, "-postfluffig"); +} +#endif + + +typedef struct parallelise_info +{ + ir_node *origin_block; + ir_node *origin_ptr; + ir_mode *origin_mode; + ir_nodeset_t this_mem; + ir_nodeset_t user_mem; +} parallelise_info; + + +static void parallelise_load(parallelise_info *pi, ir_node *irn) +{ + /* There is no point in investigating the same subgraph twice */ + if (ir_nodeset_contains(&pi->user_mem, irn)) + return; + + //ir_fprintf(stderr, "considering %+F\n", irn); + if (get_nodes_block(irn) == pi->origin_block) { + if (is_Proj(irn)) { + ir_node *pred = get_Proj_pred(irn); + if (is_Load(pred) && + get_Load_volatility(pred) == volatility_non_volatile) { + ir_node *mem = get_Load_mem(pred); + //ir_nodeset_insert(&pi->this_mem, mem); + ir_nodeset_insert(&pi->user_mem, irn); + //ir_fprintf(stderr, "adding %+F to user set\n", irn); + parallelise_load(pi, mem); + return; + } else if (is_Store(pred) && + get_Store_volatility(pred) == volatility_non_volatile) { + ir_mode *org_mode = pi->origin_mode; + ir_node *org_ptr = pi->origin_ptr; + ir_mode *store_mode = get_irn_mode(get_Store_value(pred)); + ir_node *store_ptr = get_Store_ptr(pred); + if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) { + ir_node *mem = get_Store_mem(pred); + //ir_fprintf(stderr, "Ld after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode); + ir_nodeset_insert(&pi->user_mem, irn); + //ir_fprintf(stderr, "adding %+F to user set\n", irn); + parallelise_load(pi, mem); + return; + } + } + } else if (is_Sync(irn)) { + int n = get_Sync_n_preds(irn); + int i; + + for (i = 0; i < n; ++i) { + ir_node *sync_pred = get_Sync_pred(irn, i); + parallelise_load(pi, sync_pred); + } + return; + } + } + ir_nodeset_insert(&pi->this_mem, irn); + //ir_fprintf(stderr, "adding %+F to this set\n", irn); +} + + +static void parallelise_store(parallelise_info *pi, ir_node *irn) +{ + /* There is no point in investigating the same subgraph twice */ + if (ir_nodeset_contains(&pi->user_mem, irn)) + return; + + //ir_fprintf(stderr, "considering %+F\n", irn); + if (get_nodes_block(irn) == pi->origin_block) { + if (is_Proj(irn)) { + ir_node *pred = get_Proj_pred(irn); + if (is_Load(pred) && + get_Load_volatility(pred) == volatility_non_volatile) { + ir_mode *org_mode = pi->origin_mode; + ir_node *org_ptr = pi->origin_ptr; + ir_mode *load_mode = get_Load_mode(pred); + ir_node *load_ptr = get_Load_ptr(pred); + if (get_alias_relation(current_ir_graph, org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) { + ir_node *mem = get_Load_mem(pred); + //ir_fprintf(stderr, "St after Ld: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, load_ptr, load_mode); + ir_nodeset_insert(&pi->user_mem, irn); + //ir_fprintf(stderr, "adding %+F to user set\n", irn); + parallelise_store(pi, mem); + return; + } + } else if (is_Store(pred) && + get_Store_volatility(pred) == volatility_non_volatile) { + ir_mode *org_mode = pi->origin_mode; + ir_node *org_ptr = pi->origin_ptr; + ir_mode *store_mode = get_irn_mode(get_Store_value(pred)); + ir_node *store_ptr = get_Store_ptr(pred); + if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) { + ir_node *mem; + + //ir_fprintf(stderr, "St after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode); + ir_nodeset_insert(&pi->user_mem, irn); + //ir_fprintf(stderr, "adding %+F to user set\n", irn); + mem = get_Store_mem(pred); + parallelise_store(pi, mem); + return; + } + } + } else if (is_Sync(irn)) { + int n = get_Sync_n_preds(irn); + int i; + + for (i = 0; i < n; ++i) { + ir_node *sync_pred = get_Sync_pred(irn, i); + parallelise_store(pi, sync_pred); + } + return; + } + } + ir_nodeset_insert(&pi->this_mem, irn); + //ir_fprintf(stderr, "adding %+F to this set\n", irn); +} + + +static void walker(ir_node *proj, void *env) +{ + ir_node *mem_op; + ir_node *pred; + ir_node *block; + int n; + parallelise_info pi; + + (void)env; + + if (!is_Proj(proj)) return; + if (get_irn_mode(proj) != mode_M) return; + + mem_op = get_Proj_pred(proj); + if (is_Load(mem_op)) { + if (get_Load_volatility(mem_op) != volatility_non_volatile) return; + + block = get_nodes_block(mem_op); + pred = get_Load_mem(mem_op); + //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj); + + pi.origin_block = block, + pi.origin_ptr = get_Load_ptr(mem_op); + pi.origin_mode = get_Load_mode(mem_op); + ir_nodeset_init(&pi.this_mem); + ir_nodeset_init(&pi.user_mem); + + parallelise_load(&pi, pred); + } else if (is_Store(mem_op)) { + if (get_Store_volatility(mem_op) != volatility_non_volatile) return; + + block = get_nodes_block(mem_op); + pred = get_Store_mem(mem_op); + //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj); + + pi.origin_block = block, + pi.origin_ptr = get_Store_ptr(mem_op); + pi.origin_mode = get_irn_mode(get_Store_value(mem_op)); + ir_nodeset_init(&pi.this_mem); + ir_nodeset_init(&pi.user_mem); + + parallelise_store(&pi, pred); + } else { + return; + } + + n = ir_nodeset_size(&pi.user_mem); + if (n != 0) { /* nothing happened otherwise */ + ir_graph *irg = current_ir_graph; + ir_node *sync; + ir_node **in; + ir_nodeset_iterator_t iter; + int i; + + ++n; + //ir_fprintf(stderr, "creating sync for users of %+F with %d inputs\n", proj, n); + NEW_ARR_A(ir_node*, in, n); + i = 0; + in[i++] = new_r_Unknown(irg, mode_M); + ir_nodeset_iterator_init(&iter, &pi.user_mem); + for (;;) { + ir_node* p = ir_nodeset_iterator_next(&iter); + if (p == NULL) break; + in[i++] = p; + } + assert(i == n); + sync = new_r_Sync(irg, block, n, in); + exchange(proj, sync); + + assert(pn_Load_M == pn_Store_M); + proj = new_r_Proj(irg, block, mem_op, mode_M, pn_Load_M); + set_Sync_pred(sync, 0, proj); + + n = ir_nodeset_size(&pi.this_mem); + //ir_fprintf(stderr, "creating sync for %+F with %d inputs\n", mem_op, n); + ir_nodeset_iterator_init(&iter, &pi.this_mem); + if (n == 1) { + sync = ir_nodeset_iterator_next(&iter); + } else { + NEW_ARR_A(ir_node*, in, n); + i = 0; + for (;;) { + ir_node* p = ir_nodeset_iterator_next(&iter); + if (p == NULL) break; + in[i++] = p; + } + assert(i == n); + sync = new_r_Sync(irg, block, n, in); + } + set_memop_mem(mem_op, sync); + } + + ir_nodeset_destroy(&pi.this_mem); + ir_nodeset_destroy(&pi.user_mem); +} + + +void opt_sync(ir_graph *irg) +{ + //assure_irg_entity_usage_computed(irg); + //assure_irp_globals_entity_usage_computed(); + + irg_walk_graph(irg, NULL, walker, NULL); + //optimize_graph_df(irg); + //irg_walk_graph(irg, NormaliseSync, NULL, NULL); }