- add more passes
[libfirm] / ir / opt / ldst2.c
index 4372bac..0c72988 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
+ * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
  *
  * This file is part of libFirm.
  *
  * @file
  * @brief   parallelizing Load/Store optimisation
  * @author  Christoph Mallon
- * @version $Id$
+ * @version $Id$
  */
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
-#include "array.h"
+#include "iroptimize.h"
+
+#include "array_t.h"
 #include "debug.h"
 #include "ircons.h"
 #include "irgraph.h"
 #include "irmemory.h"
 #include "irnode.h"
 #include "irnodeset.h"
-#include "ldst2.h"
 #include "obst.h"
-#include "return.h"
 #include "irdump.h"
+#include "irflag_t.h"
+#include "irprintf.h"
+#include "irtools.h"
 
-
+#if +0
 #define OPTIMISE_LOAD_AFTER_LOAD
 
 
@@ -92,7 +93,7 @@ static void CollectAddresses(ir_graph* irg)
                for (i = 0; i < count_addrs; i++) {
                        ir_node* addr = ir_nodeset_iterator_next(&addr_iter);
                        assert(addr != NULL);
-                       set_irn_link(addr, (void*)(uintptr_t)i);
+                       set_irn_link(addr, (void *)i);
                        addrs[i] = addr;
                        DB((dbg, LEVEL_2, "===> Collected unique symbolic address %+F\n", addr));
                }
@@ -104,6 +105,7 @@ static void AliasSetAdder(ir_node* block, void* env)
 {
        ir_nodeset_t* alias_set;
        size_t i;
+       (void) env;
 
        alias_set = NEW_ARR_D(ir_nodeset_t, &obst, count_addrs);
        for (i = 0; i < count_addrs; i++) {
@@ -134,6 +136,7 @@ static void AliasSetDestroyer(ir_node* block, void* env)
 {
        ir_nodeset_t* alias_set = get_irn_link(block);
        size_t i;
+       (void) env;
 
        for (i = 0; i < count_addrs; i++) {
                ir_nodeset_destroy(&alias_set[i]);
@@ -153,7 +156,7 @@ static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode,
        } else if (is_Store(other)) {
                other_addr = get_Store_ptr(other);
        } else {
-               return may_alias;
+               return ir_may_alias;
        }
 
        other_mode = get_irn_mode(other);
@@ -161,6 +164,14 @@ static ir_alias_relation AliasTest(ir_graph* irg, ir_node* addr, ir_mode* mode,
 }
 
 
+static int in_cmp(void const* va, void const* vb)
+{
+       ir_node const* const a = *(ir_node const*const*)va;
+       ir_node const* const b = *(ir_node const*const*)vb;
+       return get_irn_idx(a) - get_irn_idx(b);
+}
+
+
 static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_set)
 {
        size_t set_size = ir_nodeset_size(after_set);
@@ -179,6 +190,7 @@ static ir_node* GenerateSync(ir_graph* irg, ir_node* block, ir_nodeset_t* after_
                for (i = 0; i < set_size; i++) {
                        in[i] = ir_nodeset_iterator_next(&iter);
                }
+               qsort(in, set_size, sizeof(*in), in_cmp);
                return new_r_Sync(irg, block, set_size, in);
        }
 }
@@ -243,7 +255,7 @@ static void WalkMemPhi(ir_graph* irg, ir_node* block, ir_node* phi)
 static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* memory)
 {
        ir_node* addr = get_Load_ptr(load);
-       size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr);
+       size_t addr_idx = (size_t)get_irn_link(addr);
        ir_nodeset_t* interfere_sets = get_irn_link(block);
        ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
        size_t size = ir_nodeset_size(interfere_set);
@@ -254,27 +266,18 @@ static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* mem
        ir_nodeset_iterator_init(&interfere_iter, interfere_set);
        if (size == 1) {
                ir_node* after = ir_nodeset_iterator_next(&interfere_iter);
-               if (is_Proj(after)) {
-                       ir_node* pred = get_Proj_pred(after);
-                       if (is_Load(pred)) {
-#ifdef OPTIMISE_LOAD_AFTER_LOAD
-                               if (get_Load_ptr(pred) == addr && get_Load_mode(pred) == get_Load_mode(load)) {
-                                       exchange(load, pred);
-                                       return;
-                               }
-#endif
-                               after = get_Load_mem(pred);
-                       }
-               }
+               assert(!is_Proj(after) || !is_Load(get_Proj_pred(after)));
                DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, after));
                set_Load_mem(load, after);
        } else {
                ir_node** after_set;
-               ir_node* sync;
+               ir_node* after;
+               ir_node* mem;
+               size_t i;
 
                NEW_ARR_A(ir_node*, after_set, size);
-               for (i = 0; i < size; i++) {
-                       ir_node* mem = ir_nodeset_iterator_next(&interfere_iter);
+               i = 0;
+               while ((mem = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
                        if (is_Proj(mem)) {
                                ir_node* pred = get_Proj_pred(mem);
                                if (is_Load(pred)) {
@@ -284,13 +287,19 @@ static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* mem
                                                return;
                                        }
 #endif
-                                       mem = get_Load_mem(pred);
+                                       continue;
                                }
                        }
-                       after_set[i] = mem;
-                       sync = new_r_Sync(irg, block, size, after_set);
+                       DB((dbg, LEVEL_3, "===> %+F must be executed after %+F\n", load, mem));
+                       after_set[i++] = mem;
+               }
+               assert(i != 0);
+               if (i == 1) {
+                       after = after_set[0];
+               } else {
+                       after = new_r_Sync(irg, block, i, after_set);
                }
-               set_Load_mem(load, sync);
+               set_Load_mem(load, after);
        }
 
        for (i = 0; i < count_addrs; i++) {
@@ -298,23 +307,13 @@ static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* mem
                ir_node* other_addr = addrs[i];
                ir_mode* other_mode = mode; // XXX second mode is nonsense
                ir_alias_relation rel = get_alias_relation(irg, addr, mode, other_addr, other_mode);
-               ir_node* other_node;
 
                DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
-               if (rel == no_alias) {
+               if (rel == ir_no_alias) {
                        continue;
                }
                DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", load, other_addr));
 
-               ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
-               while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
-                       if (is_Proj(other_node) && is_Load(get_Proj_pred(other_node))) continue;
-                       if (AliasTest(irg, addr, mode, other_node) != no_alias) {
-                               DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], load));
-                               ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
-                       }
-               }
-
                ir_nodeset_insert(&interfere_sets[i], memory);
        }
 }
@@ -323,7 +322,7 @@ static void PlaceLoad(ir_graph* irg, ir_node* block, ir_node* load, ir_node* mem
 static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* memory)
 {
        ir_node* addr = get_Store_ptr(store);
-       size_t addr_idx = (size_t)(uintptr_t)get_irn_link(addr);
+       size_t addr_idx = (size_t)get_irn_link(addr);
        ir_nodeset_t* interfere_sets = get_irn_link(block);
        ir_nodeset_t* interfere_set = &interfere_sets[addr_idx];
        ir_node* after;
@@ -341,14 +340,14 @@ static void PlaceStore(ir_graph* irg, ir_node* block, ir_node* store, ir_node* m
                ir_node* other_node;
 
                DB((dbg, LEVEL_3, "===> Testing for alias between %+F and %+F. Relation is %d\n", addr, other_addr, rel));
-               if (rel == no_alias) {
+               if (rel == ir_no_alias) {
                        continue;
                }
                DB((dbg, LEVEL_3, "===> %+F potentially aliases address %+F\n", store, other_addr));
 
                ir_nodeset_iterator_init(&interfere_iter, &interfere_sets[i]);
                while ((other_node = ir_nodeset_iterator_next(&interfere_iter)) != NULL) {
-                       if (AliasTest(irg, addr, mode, other_node) != no_alias) {
+                       if (AliasTest(irg, addr, mode, other_node) != ir_no_alias) {
                                DB((dbg, LEVEL_3, "===> Removing %+F from execute-after set of %+F due to %+F\n", other_node, addrs[i], store));
                                ir_nodeset_remove_iterator(&interfere_sets[i], &interfere_iter);
                        }
@@ -370,7 +369,7 @@ static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block)
        if (block != last_block) {
                DB((dbg, LEVEL_3, "===> Changing block from %+F to %+F\n", last_block, block));
                block_change = 1;
-               if (Block_not_block_visited(block)) {
+               if (!Block_block_visited(block)) {
                        mark_Block_block_visited(block);
                } else {
                        DB((dbg, LEVEL_2, "===> Hit already visited block at %+F\n", node));
@@ -410,11 +409,12 @@ static int WalkMem(ir_graph* irg, ir_node* node, ir_node* last_block)
                                ir_node* unknown;
 
                                DB((dbg, LEVEL_3, "===> The predecessor was not finished yet\n"));
-                               assert(!Block_not_block_visited(pred_block));
+                               assert(Block_block_visited(pred_block));
 
                                unknown = new_r_Unknown(irg, mode_M);
                                for (i = 0; i < count_addrs; i++) {
                                        ir_node* phi_unk = new_r_Phi(irg, block, 1, &unknown, mode_M);
+                                       DB((dbg, LEVEL_3, "===> Placing unfinished %+F for %+F in %+F\n", phi_unk, addrs[i], block));
                                        set_irn_link(phi_unk, unfinished_phis[i]);
                                        unfinished_phis[i] = phi_unk;
                                        ir_nodeset_insert(&thissets[i], phi_unk);
@@ -524,7 +524,7 @@ static void Detotalise(ir_graph* irg)
        size_t npreds = get_Block_n_cfgpreds(end_block);
        size_t i;
 
-       unfinished_phis = xmalloc(sizeof(*unfinished_phis) * count_addrs);
+       unfinished_phis = XMALLOCN(ir_node, count_addrs);
        for (i = 0; i < count_addrs; i++) {
                unfinished_phis[i] = NULL;
        }
@@ -539,8 +539,10 @@ static void Detotalise(ir_graph* irg)
        FinalisePhis(irg);
        xfree(unfinished_phis);
 }
+#endif
 
 
+#if 0
 static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync)
 {
        size_t n = get_Sync_n_preds(sync);
@@ -556,7 +558,6 @@ static void AddSyncPreds(ir_nodeset_t* preds, ir_node* sync)
        }
 }
 
-
 static void NormaliseSync(ir_node* node, void* env)
 {
        ir_nodeset_t preds;
@@ -564,6 +565,7 @@ static void NormaliseSync(ir_node* node, void* env)
        ir_node** in;
        size_t count_preds;
        size_t i;
+       (void) env;
 
        if (!is_Sync(node)) return;
 
@@ -571,7 +573,7 @@ static void NormaliseSync(ir_node* node, void* env)
        AddSyncPreds(&preds, node);
 
        count_preds = ir_nodeset_size(&preds);
-       if (count_preds != get_Sync_n_preds(node)) {
+       if (count_preds != (unsigned)get_Sync_n_preds(node)) {
                NEW_ARR_A(ir_node*, in, count_preds);
                ir_nodeset_iterator_init(&iter, &preds);
                for (i = 0; i < count_preds; i++) {
@@ -585,13 +587,13 @@ static void NormaliseSync(ir_node* node, void* env)
        ir_nodeset_destroy(&preds);
 }
 
-
 void opt_ldst2(ir_graph* irg)
 {
        FIRM_DBG_REGISTER(dbg, "firm.opt.ldst2");
        DB((dbg, LEVEL_1, "===> Performing load/store optimisation on %+F\n", irg));
 
        normalize_one_return(irg);
+       dump_ir_block_graph(irg, "-prefluffig");
 
        obstack_init(&obst);
 
@@ -608,7 +610,6 @@ void opt_ldst2(ir_graph* irg)
        inc_irg_block_visited(irg);
        SetStartAddressesTop(irg);
        Detotalise(irg);
-
        dump_ir_block_graph(irg, "-fluffig");
 
        irg_block_walk_graph(irg, AliasSetDestroyer, NULL, NULL);
@@ -618,4 +619,235 @@ void opt_ldst2(ir_graph* irg)
        irg_walk_graph(irg, NormaliseSync, NULL, NULL);
   optimize_graph_df(irg);
        irg_walk_graph(irg, NormaliseSync, NULL, NULL);
+       dump_ir_block_graph(irg, "-postfluffig");
+}
+#endif
+
+
+typedef struct parallelise_info
+{
+       ir_node      *origin_block;
+       ir_node      *origin_ptr;
+       ir_mode      *origin_mode;
+       ir_nodeset_t  this_mem;
+       ir_nodeset_t  user_mem;
+} parallelise_info;
+
+
+static void parallelise_load(parallelise_info *pi, ir_node *irn)
+{
+       /* There is no point in investigating the same subgraph twice */
+       if (ir_nodeset_contains(&pi->user_mem, irn))
+               return;
+
+       //ir_fprintf(stderr, "considering %+F\n", irn);
+       if (get_nodes_block(irn) == pi->origin_block) {
+               if (is_Proj(irn)) {
+                       ir_node *pred = get_Proj_pred(irn);
+                       if (is_Load(pred) &&
+                                       get_Load_volatility(pred) == volatility_non_volatile) {
+                               ir_node *mem = get_Load_mem(pred);
+                               //ir_nodeset_insert(&pi->this_mem, mem);
+                               ir_nodeset_insert(&pi->user_mem, irn);
+                               //ir_fprintf(stderr, "adding %+F to user set\n", irn);
+                               parallelise_load(pi, mem);
+                               return;
+                       } else if (is_Store(pred) &&
+                                       get_Store_volatility(pred) == volatility_non_volatile) {
+                               ir_mode *org_mode   = pi->origin_mode;
+                               ir_node *org_ptr    = pi->origin_ptr;
+                               ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
+                               ir_node *store_ptr  = get_Store_ptr(pred);
+                               if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
+                                       ir_node *mem = get_Store_mem(pred);
+                                       //ir_fprintf(stderr, "Ld after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode);
+                                       ir_nodeset_insert(&pi->user_mem, irn);
+                                       //ir_fprintf(stderr, "adding %+F to user set\n", irn);
+                                       parallelise_load(pi, mem);
+                                       return;
+                               }
+                       }
+               } else if (is_Sync(irn)) {
+                       int n = get_Sync_n_preds(irn);
+                       int i;
+
+                       for (i = 0; i < n; ++i) {
+                               ir_node *sync_pred = get_Sync_pred(irn, i);
+                               parallelise_load(pi, sync_pred);
+                       }
+                       return;
+               }
+       }
+       ir_nodeset_insert(&pi->this_mem, irn);
+       //ir_fprintf(stderr, "adding %+F to this set\n", irn);
+}
+
+
+static void parallelise_store(parallelise_info *pi, ir_node *irn)
+{
+       /* There is no point in investigating the same subgraph twice */
+       if (ir_nodeset_contains(&pi->user_mem, irn))
+               return;
+
+       //ir_fprintf(stderr, "considering %+F\n", irn);
+       if (get_nodes_block(irn) == pi->origin_block) {
+               if (is_Proj(irn)) {
+                       ir_node *pred = get_Proj_pred(irn);
+                       if (is_Load(pred) &&
+                                       get_Load_volatility(pred) == volatility_non_volatile) {
+                               ir_mode *org_mode  = pi->origin_mode;
+                               ir_node *org_ptr   = pi->origin_ptr;
+                               ir_mode *load_mode = get_Load_mode(pred);
+                               ir_node *load_ptr  = get_Load_ptr(pred);
+                               if (get_alias_relation(current_ir_graph, org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
+                                       ir_node *mem = get_Load_mem(pred);
+                                       //ir_fprintf(stderr, "St after Ld: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, load_ptr, load_mode);
+                                       ir_nodeset_insert(&pi->user_mem, irn);
+                                       //ir_fprintf(stderr, "adding %+F to user set\n", irn);
+                                       parallelise_store(pi, mem);
+                                       return;
+                               }
+                       } else if (is_Store(pred) &&
+                                       get_Store_volatility(pred) == volatility_non_volatile) {
+                               ir_mode *org_mode   = pi->origin_mode;
+                               ir_node *org_ptr    = pi->origin_ptr;
+                               ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
+                               ir_node *store_ptr  = get_Store_ptr(pred);
+                               if (get_alias_relation(current_ir_graph, org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
+                                       ir_node *mem;
+
+                                       //ir_fprintf(stderr, "St after St: %+F (%+F) does not alias %+F (%+F)\n", org_ptr, org_mode, store_ptr, store_mode);
+                                       ir_nodeset_insert(&pi->user_mem, irn);
+                                       //ir_fprintf(stderr, "adding %+F to user set\n", irn);
+                                       mem = get_Store_mem(pred);
+                                       parallelise_store(pi, mem);
+                                       return;
+                               }
+                       }
+               } else if (is_Sync(irn)) {
+                       int n = get_Sync_n_preds(irn);
+                       int i;
+
+                       for (i = 0; i < n; ++i) {
+                               ir_node *sync_pred = get_Sync_pred(irn, i);
+                               parallelise_store(pi, sync_pred);
+                       }
+                       return;
+               }
+       }
+       ir_nodeset_insert(&pi->this_mem, irn);
+       //ir_fprintf(stderr, "adding %+F to this set\n", irn);
+}
+
+
+static void walker(ir_node *proj, void *env)
+{
+       ir_node          *mem_op;
+       ir_node          *pred;
+       ir_node          *block;
+       int               n;
+       parallelise_info  pi;
+
+       (void)env;
+
+       if (!is_Proj(proj)) return;
+       if (get_irn_mode(proj) != mode_M) return;
+
+       mem_op = get_Proj_pred(proj);
+       if (is_Load(mem_op)) {
+               if (get_Load_volatility(mem_op) != volatility_non_volatile) return;
+
+               block = get_nodes_block(mem_op);
+               pred  = get_Load_mem(mem_op);
+               //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj);
+
+               pi.origin_block = block,
+               pi.origin_ptr   = get_Load_ptr(mem_op);
+               pi.origin_mode  = get_Load_mode(mem_op);
+               ir_nodeset_init(&pi.this_mem);
+               ir_nodeset_init(&pi.user_mem);
+
+               parallelise_load(&pi, pred);
+       } else if (is_Store(mem_op)) {
+               if (get_Store_volatility(mem_op) != volatility_non_volatile) return;
+
+               block = get_nodes_block(mem_op);
+               pred  = get_Store_mem(mem_op);
+               //ir_fprintf(stderr, "starting parallelise at %+F for %+F\n", pred, proj);
+
+               pi.origin_block = block,
+               pi.origin_ptr   = get_Store_ptr(mem_op);
+               pi.origin_mode  = get_irn_mode(get_Store_value(mem_op));
+               ir_nodeset_init(&pi.this_mem);
+               ir_nodeset_init(&pi.user_mem);
+
+               parallelise_store(&pi, pred);
+       } else {
+               return;
+       }
+
+       n = ir_nodeset_size(&pi.user_mem);
+       if (n != 0) { /* nothing happened otherwise */
+               ir_graph               *irg  = current_ir_graph;
+               ir_node                *sync;
+               ir_node               **in;
+               ir_nodeset_iterator_t   iter;
+               int                     i;
+
+               ++n;
+               //ir_fprintf(stderr, "creating sync for users of %+F with %d inputs\n", proj, n);
+               NEW_ARR_A(ir_node*, in, n);
+               i = 0;
+               in[i++] = new_r_Unknown(irg, mode_M);
+               ir_nodeset_iterator_init(&iter, &pi.user_mem);
+               for (;;) {
+                       ir_node* p = ir_nodeset_iterator_next(&iter);
+                       if (p == NULL) break;
+                       in[i++] = p;
+               }
+               assert(i == n);
+               sync = new_r_Sync(block, n, in);
+               exchange(proj, sync);
+
+               assert(pn_Load_M == pn_Store_M);
+               proj = new_r_Proj(block, mem_op, mode_M, pn_Load_M);
+               set_Sync_pred(sync, 0, proj);
+
+               n = ir_nodeset_size(&pi.this_mem);
+               //ir_fprintf(stderr, "creating sync for %+F with %d inputs\n", mem_op, n);
+               ir_nodeset_iterator_init(&iter, &pi.this_mem);
+               if (n == 1) {
+                       sync = ir_nodeset_iterator_next(&iter);
+               } else {
+                       NEW_ARR_A(ir_node*, in, n);
+                       i = 0;
+                       for (;;) {
+                               ir_node* p = ir_nodeset_iterator_next(&iter);
+                               if (p == NULL) break;
+                               in[i++] = p;
+                       }
+                       assert(i == n);
+                       sync = new_r_Sync(block, n, in);
+               }
+               set_memop_mem(mem_op, sync);
+       }
+
+       ir_nodeset_destroy(&pi.this_mem);
+       ir_nodeset_destroy(&pi.user_mem);
+}
+
+
+void opt_sync(ir_graph *irg)
+{
+       //assure_irg_entity_usage_computed(irg);
+       //assure_irp_globals_entity_usage_computed();
+
+       irg_walk_graph(irg, NULL, walker, NULL);
+       //optimize_graph_df(irg);
+       //irg_walk_graph(irg, NormaliseSync, NULL, NULL);
+}
+
+ir_graph_pass_t *opt_sync_pass(const char *name, int verify, int dump)
+{
+       return def_graph_pass(name ? name : "opt_sync", verify, dump, opt_sync);
 }