From 3910c38cb1f9dc1ec8792cd2b9234c802ab5ff9c Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 4 Sep 2009 13:19:44 +0000 Subject: [PATCH] new heursitic for good coloring order; add utility function to generate a postorder [r26488] --- ir/be/bemodule.c | 1 - ir/be/benewalloc.c | 138 +++++++++++++++++++++++++++++++++++++++--- ir/be/bespillbelady.c | 9 +-- ir/be/beutil.c | 22 +++++++ ir/be/beutil.h | 7 +++ 5 files changed, 161 insertions(+), 16 deletions(-) diff --git a/ir/be/bemodule.c b/ir/be/bemodule.c index 1036ddcb4..69f73c92f 100644 --- a/ir/be/bemodule.c +++ b/ir/be/bemodule.c @@ -59,7 +59,6 @@ void be_init_ra(void); void be_init_spillbelady(void); void be_init_spillbelady2(void); void be_init_spillbelady3(void); -//void be_init_spilllinearscan(void); void be_init_ssaconstr(void); void be_init_stabs(void); void be_init_straight_alloc(void); diff --git a/ir/be/benewalloc.c b/ir/be/benewalloc.c index db4161706..f94f8cb29 100644 --- a/ir/be/benewalloc.c +++ b/ir/be/benewalloc.c @@ -58,9 +58,12 @@ #include "irgraph_t.h" #include "irgwalk.h" #include "irnode_t.h" +#include "irprintf.h" #include "obst.h" #include "raw_bitset.h" #include "unionfind.h" +#include "pdeq.h" +#include "hungarian.h" #include "beabi.h" #include "bechordal_t.h" @@ -74,8 +77,7 @@ #include "bespill.h" #include "bespillutil.h" #include "beverify.h" - -#include "hungarian.h" +#include "beutil.h" #define USE_FACTOR 1.0f #define DEF_FACTOR 1.0f @@ -96,6 +98,8 @@ static const ir_exec_freq *execfreqs; static unsigned n_regs; static unsigned *normal_regs; static int *congruence_classes; +static ir_node **block_order; +static int n_block_order; /** currently active assignments (while processing a basic block) * maps registers to values(their current copies) */ @@ -1540,11 +1544,129 @@ static void allocate_coalesce_block(ir_node *block, void *data) } } +typedef struct block_costs_t block_costs_t; +struct block_costs_t { + float costs; /**< costs of the block */ + int dfs_num; /**< depth first search number (to detect backedges) */ +}; + +static int cmp_block_costs(const void *d1, const void *d2) +{ + const ir_node * const *block1 = d1; + const ir_node * const *block2 = d2; + const block_costs_t *info1 = get_irn_link(*block1); + const block_costs_t *info2 = get_irn_link(*block2); + return QSORT_CMP(info2->costs, info1->costs); +} + +static void determine_block_order(void) +{ + int i; + ir_node **blocklist = be_get_cfgpostorder(irg); + int n_blocks = ARR_LEN(blocklist); + int dfs_num = 0; + + /* clear block links... */ + for (i = 0; i < n_blocks; ++i) { + ir_node *block = blocklist[i]; + set_irn_link(block, NULL); + } + + /* walk blocks in reverse postorder, the costs for each block are the + * sum of the costs of its predecessors (excluding the costs on backedges + * which we can't determine) */ + for (i = n_blocks-1; i >= 0; --i) { + block_costs_t *cost_info; + ir_node *block = blocklist[i]; + + float execfreq = get_block_execfreq(execfreqs, block); + float costs = execfreq; + int n_cfgpreds = get_Block_n_cfgpreds(block); + int p; + for (p = 0; p < n_cfgpreds; ++p) { + ir_node *pred_block = get_Block_cfgpred_block(block, p); + block_costs_t *pred_costs = get_irn_link(pred_block); + /* we don't have any info for backedges */ + if (pred_costs == NULL) + continue; + costs += pred_costs->costs; + } + + cost_info = OALLOCZ(&obst, block_costs_t); + cost_info->costs = costs; + cost_info->dfs_num = dfs_num++; + set_irn_link(block, cost_info); + } + + /* sort array by block costs */ + qsort(blocklist, n_blocks, sizeof(blocklist[0]), cmp_block_costs); + + ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED); + inc_irg_block_visited(irg); + + pdeq *worklist = new_pdeq(); + ir_node **order = XMALLOCN(ir_node*, n_blocks); + int order_p = 0; + for (i = 0; i < n_blocks; ++i) { + ir_node *block = blocklist[i]; + if (Block_block_visited(block)) + continue; + + /* continually add predecessors with highest costs to worklist + * (without using backedges) */ + do { + block_costs_t *info = get_irn_link(block); + ir_node *best_pred = NULL; + float best_costs = -1; + int n_cfgpred = get_Block_n_cfgpreds(block); + int i; + + pdeq_putr(worklist, block); + mark_Block_block_visited(block); + for (i = 0; i < n_cfgpred; ++i) { + ir_node *pred_block = get_Block_cfgpred_block(block, i); + block_costs_t *pred_info = get_irn_link(pred_block); + + /* ignore backedges */ + if (pred_info->dfs_num > info->dfs_num) + continue; + + if (info->costs > best_costs) { + best_costs = info->costs; + best_pred = pred_block; + } + } + block = best_pred; + } while(block != NULL && !Block_block_visited(block)); + + /* now put all nodes in the worklist in our final order */ + while (!pdeq_empty(worklist)) { + ir_node *pblock = pdeq_getr(worklist); + assert(order_p < n_blocks); + order[order_p++] = pblock; + } + } + assert(order_p == n_blocks); + del_pdeq(worklist); + + ir_free_resources(irg, IR_RESOURCE_BLOCK_VISITED); + + DEL_ARR_F(blocklist); + + obstack_free(&obst, NULL); + obstack_init(&obst); + + block_order = order; + n_block_order = n_blocks; +} + /** * Run the register allocator for the current register class. */ static void be_straight_alloc_cls(void) { + int i; + lv = be_assure_liveness(birg); be_liveness_assure_sets(lv); be_liveness_assure_chk(lv); @@ -1556,9 +1678,11 @@ static void be_straight_alloc_cls(void) be_clear_links(irg); irg_block_walk_graph(irg, NULL, analyze_block, NULL); combine_congruence_classes(); - /* we need some dominance pre-order walk to ensure we see all - * definitions/create copies before we encounter their users */ - dom_tree_walk_irg(irg, allocate_coalesce_block, NULL, NULL); + + for (i = 0; i < n_block_order; ++i) { + ir_node *block = block_order[i]; + allocate_coalesce_block(block, NULL); + } ir_free_resources(irg, IR_RESOURCE_IRN_LINK); } @@ -1609,8 +1733,8 @@ static void be_straight_alloc(be_irg_t *new_birg) irg = be_get_birg_irg(birg); execfreqs = birg->exec_freq; - /* TODO: extract some of the stuff from bechordal allocator, like - * statistics, time measurements, etc. and use them here too */ + /* determine a good coloring order */ + determine_block_order(); for (c = 0; c < n_cls; ++c) { cls = arch_env_get_reg_class(arch_env, c); diff --git a/ir/be/bespillbelady.c b/ir/be/bespillbelady.c index 5815e6e87..5147332ba 100644 --- a/ir/be/bespillbelady.c +++ b/ir/be/bespillbelady.c @@ -944,12 +944,6 @@ static void fix_block_borders(ir_node *block, void *data) } } -static void add_block(ir_node *block, void *data) -{ - (void) data; - ARR_APP1(ir_node*, blocklist, block); -} - static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls) { int i; @@ -980,8 +974,7 @@ static void be_spill_belady(be_irg_t *birg, const arch_register_class_t *rcls) uses = be_begin_uses(irg, lv); loop_ana = be_new_loop_pressure(birg, cls); senv = be_new_spill_env(birg); - blocklist = NEW_ARR_F(ir_node*, 0); - irg_block_edges_walk(get_irg_start_block(irg), NULL, add_block, NULL); + blocklist = be_get_cfgpostorder(irg); stat_ev_tim_pop("belady_time_init"); stat_ev_tim_push(); diff --git a/ir/be/beutil.c b/ir/be/beutil.c index 07fd04a38..2e884d0c7 100644 --- a/ir/be/beutil.c +++ b/ir/be/beutil.c @@ -163,3 +163,25 @@ FILE *be_ffopen(const char *base, const char *ext, const char *mode) { } return out; } + +static void add_to_postorder(ir_node *block, void *data) +{ + ir_node ***list = (ir_node***) data; + ARR_APP1(ir_node*, *list, block); +} + +ir_node **be_get_cfgpostorder(ir_graph *irg) +{ + ir_node **list = NEW_ARR_F(ir_node*, 0); + ir_node *end_block = get_irg_end_block(irg); + + /* end block may be unreachable in case of endless loops */ + if (get_Block_n_cfgpreds(end_block) == 0) + ARR_APP1(ir_node*, list, end_block); + + /* walk blocks */ + irg_block_edges_walk(get_irg_start_block(irg), NULL, add_to_postorder, + &list); + + return list; +} diff --git a/ir/be/beutil.h b/ir/be/beutil.h index 0bf0a1fd2..f40820fb7 100644 --- a/ir/be/beutil.h +++ b/ir/be/beutil.h @@ -150,6 +150,13 @@ unsigned get_num_reachable_nodes(ir_graph *irg); */ ir_node *be_get_Proj_for_pn(const ir_node *irn, long pn); +/** + * Returns an array (an ARR_F) of the programs blocks in reverse postorder + * (note: caller has to free the memory with DEL_ARR_F after use; + * of course you can use ARR_LEN on the array too.) + */ +ir_node **be_get_cfgpostorder(ir_graph *irg); + /** * Opens a file named base.ext with the mode mode. */ -- 2.20.1