new heursitic for good coloring order; add utility function to generate a postorder
authorMatthias Braun <matze@braunis.de>
Fri, 4 Sep 2009 13:19:44 +0000 (13:19 +0000)
committerMatthias Braun <matze@braunis.de>
Fri, 4 Sep 2009 13:19:44 +0000 (13:19 +0000)
[r26488]

ir/be/bemodule.c
ir/be/benewalloc.c
ir/be/bespillbelady.c
ir/be/beutil.c
ir/be/beutil.h

index 1036ddc..69f73c9 100644 (file)
@@ -59,7 +59,6 @@ void be_init_ra(void);
 void be_init_spillbelady(void);
 void be_init_spillbelady2(void);
 void be_init_spillbelady3(void);
-//void be_init_spilllinearscan(void);
 void be_init_ssaconstr(void);
 void be_init_stabs(void);
 void be_init_straight_alloc(void);
index db41617..f94f8cb 100644 (file)
 #include "irgraph_t.h"
 #include "irgwalk.h"
 #include "irnode_t.h"
+#include "irprintf.h"
 #include "obst.h"
 #include "raw_bitset.h"
 #include "unionfind.h"
+#include "pdeq.h"
+#include "hungarian.h"
 
 #include "beabi.h"
 #include "bechordal_t.h"
@@ -74,8 +77,7 @@
 #include "bespill.h"
 #include "bespillutil.h"
 #include "beverify.h"
-
-#include "hungarian.h"
+#include "beutil.h"
 
 #define USE_FACTOR         1.0f
 #define DEF_FACTOR         1.0f
@@ -96,6 +98,8 @@ static const ir_exec_freq          *execfreqs;
 static unsigned                     n_regs;
 static unsigned                    *normal_regs;
 static int                         *congruence_classes;
+static ir_node                    **block_order;
+static int                          n_block_order;
 
 /** currently active assignments (while processing a basic block)
  * maps registers to values(their current copies) */
@@ -1540,11 +1544,129 @@ static void allocate_coalesce_block(ir_node *block, void *data)
        }
 }
 
+typedef struct block_costs_t block_costs_t;
+struct block_costs_t {
+       float costs;   /**< costs of the block */
+       int   dfs_num; /**< depth first search number (to detect backedges) */
+};
+
+static int cmp_block_costs(const void *d1, const void *d2)
+{
+       const ir_node       * const *block1 = d1;
+       const ir_node       * const *block2 = d2;
+       const block_costs_t *info1  = get_irn_link(*block1);
+       const block_costs_t *info2  = get_irn_link(*block2);
+       return QSORT_CMP(info2->costs, info1->costs);
+}
+
+static void determine_block_order(void)
+{
+       int i;
+       ir_node **blocklist = be_get_cfgpostorder(irg);
+       int       n_blocks  = ARR_LEN(blocklist);
+       int       dfs_num   = 0;
+
+       /* clear block links... */
+       for (i = 0; i < n_blocks; ++i) {
+               ir_node *block = blocklist[i];
+               set_irn_link(block, NULL);
+       }
+
+       /* walk blocks in reverse postorder, the costs for each block are the
+        * sum of the costs of its predecessors (excluding the costs on backedges
+        * which we can't determine) */
+       for (i = n_blocks-1; i >= 0; --i) {
+               block_costs_t *cost_info;
+               ir_node *block = blocklist[i];
+
+               float execfreq   = get_block_execfreq(execfreqs, block);
+               float costs      = execfreq;
+               int   n_cfgpreds = get_Block_n_cfgpreds(block);
+               int   p;
+               for (p = 0; p < n_cfgpreds; ++p) {
+                       ir_node       *pred_block = get_Block_cfgpred_block(block, p);
+                       block_costs_t *pred_costs = get_irn_link(pred_block);
+                       /* we don't have any info for backedges */
+                       if (pred_costs == NULL)
+                               continue;
+                       costs += pred_costs->costs;
+               }
+
+               cost_info          = OALLOCZ(&obst, block_costs_t);
+               cost_info->costs   = costs;
+               cost_info->dfs_num = dfs_num++;
+               set_irn_link(block, cost_info);
+       }
+
+       /* sort array by block costs */
+       qsort(blocklist, n_blocks, sizeof(blocklist[0]), cmp_block_costs);
+
+       ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED);
+       inc_irg_block_visited(irg);
+
+       pdeq     *worklist = new_pdeq();
+       ir_node **order    = XMALLOCN(ir_node*, n_blocks);
+       int       order_p  = 0;
+       for (i = 0; i < n_blocks; ++i) {
+               ir_node       *block = blocklist[i];
+               if (Block_block_visited(block))
+                       continue;
+
+               /* continually add predecessors with highest costs to worklist
+                * (without using backedges) */
+               do {
+                       block_costs_t *info       = get_irn_link(block);
+                       ir_node       *best_pred  = NULL;
+                       float          best_costs = -1;
+                       int            n_cfgpred  = get_Block_n_cfgpreds(block);
+                       int            i;
+
+                       pdeq_putr(worklist, block);
+                       mark_Block_block_visited(block);
+                       for (i = 0; i < n_cfgpred; ++i) {
+                               ir_node       *pred_block = get_Block_cfgpred_block(block, i);
+                               block_costs_t *pred_info  = get_irn_link(pred_block);
+
+                               /* ignore backedges */
+                               if (pred_info->dfs_num > info->dfs_num)
+                                       continue;
+
+                               if (info->costs > best_costs) {
+                                       best_costs = info->costs;
+                                       best_pred  = pred_block;
+                               }
+                       }
+                       block = best_pred;
+               } while(block != NULL && !Block_block_visited(block));
+
+               /* now put all nodes in the worklist in our final order */
+               while (!pdeq_empty(worklist)) {
+                       ir_node *pblock = pdeq_getr(worklist);
+                       assert(order_p < n_blocks);
+                       order[order_p++] = pblock;
+               }
+       }
+       assert(order_p == n_blocks);
+       del_pdeq(worklist);
+
+       ir_free_resources(irg, IR_RESOURCE_BLOCK_VISITED);
+
+       DEL_ARR_F(blocklist);
+
+       obstack_free(&obst, NULL);
+       obstack_init(&obst);
+
+       block_order   = order;
+       n_block_order = n_blocks;
+}
+
 /**
  * Run the register allocator for the current register class.
  */
 static void be_straight_alloc_cls(void)
 {
+       int i;
+
        lv = be_assure_liveness(birg);
        be_liveness_assure_sets(lv);
        be_liveness_assure_chk(lv);
@@ -1556,9 +1678,11 @@ static void be_straight_alloc_cls(void)
        be_clear_links(irg);
        irg_block_walk_graph(irg, NULL, analyze_block, NULL);
        combine_congruence_classes();
-       /* we need some dominance pre-order walk to ensure we see all
-        *  definitions/create copies before we encounter their users */
-       dom_tree_walk_irg(irg, allocate_coalesce_block, NULL, NULL);
+
+       for (i = 0; i < n_block_order; ++i) {
+               ir_node *block = block_order[i];
+               allocate_coalesce_block(block, NULL);
+       }
 
        ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
 }
@@ -1609,8 +1733,8 @@ static void be_straight_alloc(be_irg_t *new_birg)
        irg       = be_get_birg_irg(birg);
        execfreqs = birg->exec_freq;
 
-       /* TODO: extract some of the stuff from bechordal allocator, like
-        * statistics, time measurements, etc. and use them here too */
+       /* determine a good coloring order */
+       determine_block_order();
 
        for (c = 0; c < n_cls; ++c) {
                cls             = arch_env_get_reg_class(arch_env, c);
index 5815e6e..5147332 100644 (file)
@@ -944,12 +944,6 @@ static void fix_block_borders(ir_node *block, void *data)
        }
 }
 
-static void add_block(ir_node *block, void *data)
-{
-       (void) data;
-       ARR_APP1(ir_node*, blocklist, block);
-}
-
 static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls)
 {
        int i;
@@ -980,8 +974,7 @@ static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls)
        uses      = be_begin_uses(irg, lv);
        loop_ana  = be_new_loop_pressure(birg, cls);
        senv      = be_new_spill_env(birg);
-       blocklist = NEW_ARR_F(ir_node*, 0);
-       irg_block_edges_walk(get_irg_start_block(irg), NULL, add_block, NULL);
+       blocklist = be_get_cfgpostorder(irg);
        stat_ev_tim_pop("belady_time_init");
 
        stat_ev_tim_push();
index 07fd04a..2e884d0 100644 (file)
@@ -163,3 +163,25 @@ FILE *be_ffopen(const char *base, const char *ext, const char *mode) {
        }
        return out;
 }
+
+static void add_to_postorder(ir_node *block, void *data)
+{
+       ir_node ***list = (ir_node***) data;
+       ARR_APP1(ir_node*, *list, block);
+}
+
+ir_node **be_get_cfgpostorder(ir_graph *irg)
+{
+       ir_node **list      = NEW_ARR_F(ir_node*, 0);
+       ir_node  *end_block = get_irg_end_block(irg);
+
+       /* end block may be unreachable in case of endless loops */
+       if (get_Block_n_cfgpreds(end_block) == 0)
+               ARR_APP1(ir_node*, list, end_block);
+
+       /* walk blocks */
+       irg_block_edges_walk(get_irg_start_block(irg), NULL, add_to_postorder,
+                            &list);
+
+       return list;
+}
index 0bf0a1f..f40820f 100644 (file)
@@ -150,6 +150,13 @@ unsigned get_num_reachable_nodes(ir_graph *irg);
  */
 ir_node *be_get_Proj_for_pn(const ir_node *irn, long pn);
 
+/**
+ * Returns an array (an ARR_F) of the programs blocks in reverse postorder
+ * (note: caller has to free the memory with DEL_ARR_F after use;
+ *  of course you can use ARR_LEN on the array too.)
+ */
+ir_node **be_get_cfgpostorder(ir_graph *irg);
+
 /**
  * Opens a file named base.ext with the mode mode.
  */