X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fbenewalloc.c;h=2c709a75b49a09ca1ad7c6486b781f6bcbd14a7e;hb=c6af4586a08198d22de066573b3d0375c4e76a8a;hp=1ec0c73baddd0b365c9cbdd3f7d45fa9825ef218;hpb=b861b5c9edeb8f62f662d2104d27cd8319743155;p=libfirm diff --git a/ir/be/benewalloc.c b/ir/be/benewalloc.c index 1ec0c73ba..2c709a75b 100644 --- a/ir/be/benewalloc.c +++ b/ir/be/benewalloc.c @@ -49,6 +49,7 @@ #include #include +#include #include "error.h" #include "execfreq.h" @@ -58,9 +59,12 @@ #include "irgraph_t.h" #include "irgwalk.h" #include "irnode_t.h" +#include "irprintf.h" #include "obst.h" #include "raw_bitset.h" #include "unionfind.h" +#include "pdeq.h" +#include "hungarian.h" #include "beabi.h" #include "bechordal_t.h" @@ -74,15 +78,15 @@ #include "bespill.h" #include "bespillutil.h" #include "beverify.h" +#include "beutil.h" -#include "hungarian.h" - -#define USE_FACTOR 1.0f -#define DEF_FACTOR 1.0f -#define NEIGHBOR_FACTOR 0.2f -#define AFF_SHOULD_BE_SAME 0.5f -#define AFF_PHI 1.0f -#define SPLIT_DELTA 1.0f +#define USE_FACTOR 1.0f +#define DEF_FACTOR 1.0f +#define NEIGHBOR_FACTOR 0.2f +#define AFF_SHOULD_BE_SAME 0.5f +#define AFF_PHI 1.0f +#define SPLIT_DELTA 1.0f +#define MAX_OPTIMISTIC_SPLIT_RECURSION 2 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) @@ -96,6 +100,8 @@ static const ir_exec_freq *execfreqs; static unsigned n_regs; static unsigned *normal_regs; static int *congruence_classes; +static ir_node **block_order; +static int n_block_order; /** currently active assignments (while processing a basic block) * maps registers to values(their current copies) */ @@ -202,9 +208,10 @@ static void mark_as_copy_of(ir_node *copy, ir_node *value) /* the copy should not be linked to something else yet */ assert(copy_info->original_value == copy); + copy_info->original_value = original; + /* copy over allocation preferences */ memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0])); - copy_info->original_value = original; } /** @@ -221,6 +228,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, { ir_nodeset_iterator_t iter; unsigned r; + unsigned n_allowed; allocation_info_t *info = get_allocation_info(node); ir_node *neighbor; @@ -236,8 +244,12 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, if (live_nodes == NULL) return; - /* TODO: reduce penalty if there are multiple allowed registers... */ - penalty *= NEIGHBOR_FACTOR; + penalty *= NEIGHBOR_FACTOR; + n_allowed = rbitset_popcnt(limited, n_regs); + if (n_allowed > 1) { + /* only create a very weak penalty if multiple regs are allowed */ + penalty = (penalty * 0.8) / n_allowed; + } foreach_ir_nodeset(live_nodes, neighbor, iter) { allocation_info_t *neighbor_info; @@ -391,46 +403,145 @@ static void analyze_block(ir_node *block, void *data) ir_nodeset_destroy(&live_nodes); } -static void create_congurence_class(ir_node *node, void *data) +static void congruence_def(ir_nodeset_t *live_nodes, ir_node *node) { - (void) data; - if (is_Phi(node)) { - int i; - int arity = get_irn_arity(node); - unsigned phi_idx = get_irn_idx(node); - phi_idx = uf_find(congruence_classes, phi_idx); - for (i = 0; i < arity; ++i) { - ir_node *op = get_Phi_pred(node, i); - int op_idx = get_irn_idx(op); - op_idx = uf_find(congruence_classes, op_idx); - phi_idx = uf_union(congruence_classes, phi_idx, op_idx); + const arch_register_req_t *req; + + if (get_irn_mode(node) == mode_T) { + const ir_edge_t *edge; + foreach_out_edge(node, edge) { + ir_node *def = get_edge_src_irn(edge); + congruence_def(live_nodes, def); } return; } + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + return; + /* should be same constraint? */ - if (is_Proj(node)) { - const arch_register_req_t *req = arch_get_register_req_out(node); - if (req->type & arch_register_req_type_should_be_same) { - ir_node *pred = get_Proj_pred(node); - int arity = get_irn_arity(pred); - int i; - unsigned node_idx = get_irn_idx(node); - node_idx = uf_find(congruence_classes, node_idx); - - for (i = 0; i < arity; ++i) { - ir_node *op; - unsigned op_idx; - - if (!rbitset_is_set(&req->other_same, i)) - continue; + req = arch_get_register_req_out(node); + if (req->type & arch_register_req_type_should_be_same) { + ir_node *insn = skip_Proj(node); + int arity = get_irn_arity(insn); + int i; + unsigned node_idx = get_irn_idx(node); + node_idx = uf_find(congruence_classes, node_idx); + + for (i = 0; i < arity; ++i) { + ir_node *live; + ir_node *op; + int op_idx; + ir_nodeset_iterator_t iter; + bool interferes = false; - op = get_irn_n(pred, i); - op_idx = get_irn_idx(op); - op_idx = uf_find(congruence_classes, op_idx); - node_idx = uf_union(congruence_classes, node_idx, op_idx); + if (!rbitset_is_set(&req->other_same, i)) + continue; + + op = get_irn_n(insn, i); + op_idx = get_irn_idx(op); + op_idx = uf_find(congruence_classes, op_idx); + + /* do we interfere with the value */ + foreach_ir_nodeset(live_nodes, live, iter) { + int lv_idx = get_irn_idx(live); + lv_idx = uf_find(congruence_classes, lv_idx); + if (lv_idx == op_idx) { + interferes = true; + break; + } } + /* don't put in same affinity class if we interfere */ + if (interferes) + continue; + + node_idx = uf_union(congruence_classes, node_idx, op_idx); + DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", + node, op)); + /* one should_be_same is enough... */ + break; + } + } +} + +static void create_congurence_class(ir_node *block, void *data) +{ + ir_nodeset_t live_nodes; + ir_node *node; + + (void) data; + ir_nodeset_init(&live_nodes); + be_liveness_end_of_block(lv, cls, block, &live_nodes); + + /* check should be same constraints */ + sched_foreach_reverse(block, node) { + if (is_Phi(node)) + break; + + congruence_def(&live_nodes, node); + be_liveness_transfer(cls, node, &live_nodes); + } + + /* check phi congruence classes */ + sched_foreach_reverse_from(node, node) { + int i; + int arity; + int node_idx; + assert(is_Phi(node)); + + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + node_idx = get_irn_idx(node); + node_idx = uf_find(congruence_classes, node_idx); + + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + bool interferes = false; + ir_nodeset_iterator_t iter; + ir_node *live; + ir_node *phi; + ir_node *op = get_Phi_pred(node, i); + int op_idx = get_irn_idx(op); + op_idx = uf_find(congruence_classes, op_idx); + + /* do we interfere with the value */ + foreach_ir_nodeset(&live_nodes, live, iter) { + int lv_idx = get_irn_idx(live); + lv_idx = uf_find(congruence_classes, lv_idx); + if (lv_idx == op_idx) { + interferes = true; + break; + } + } + /* don't put in same affinity class if we interfere */ + if (interferes) + continue; + /* any other phi has the same input? */ + sched_foreach(block, phi) { + ir_node *oop; + int oop_idx; + if (!is_Phi(phi)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, phi)) + continue; + oop = get_Phi_pred(phi, i); + if (oop == op) + continue; + oop_idx = get_irn_idx(oop); + oop_idx = uf_find(congruence_classes, oop_idx); + if (oop_idx == op_idx) { + interferes = true; + break; + } + } + if (interferes) + continue; + + node_idx = uf_union(congruence_classes, node_idx, op_idx); + DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", + node, op)); } - return; } } @@ -488,7 +599,7 @@ static void combine_congruence_classes(void) uf_init(congruence_classes, n); /* create congruence classes */ - irg_walk_graph(irg, create_congurence_class, NULL, NULL); + irg_block_walk_graph(irg, create_congurence_class, NULL, NULL); /* merge preferences */ irg_walk_graph(irg, merge_congruence_prefs, NULL, NULL); irg_walk_graph(irg, set_congruence_prefs, NULL, NULL); @@ -557,53 +668,109 @@ static void fill_sort_candidates(reg_pref_t *regprefs, static bool try_optimistic_split(ir_node *to_split, ir_node *before, float pref, float pref_delta, - unsigned *output_regs) + unsigned *forbidden_regs, int recursion) { + const arch_register_t *from_reg; const arch_register_t *reg; + ir_node *original_insn; ir_node *block; ir_node *copy; unsigned r; + unsigned from_r; unsigned i; allocation_info_t *info = get_allocation_info(to_split); reg_pref_t *prefs; float delta; + float split_threshold; (void) pref; + /* stupid hack: don't optimisticallt split don't spill nodes... + * (so we don't split away the values produced because of + * must_be_different constraints) */ + original_insn = skip_Proj(info->original_value); + if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill) + return false; + + from_reg = arch_get_irn_register(to_split); + from_r = arch_register_get_index(from_reg); + block = get_nodes_block(before); + split_threshold = get_block_execfreq(execfreqs, block) * SPLIT_DELTA; + + if (pref_delta < split_threshold*0.5) + return false; + /* find the best free position where we could move to */ prefs = ALLOCAN(reg_pref_t, n_regs); fill_sort_candidates(prefs, info); for (i = 0; i < n_regs; ++i) { + float apref; + float apref_delta; + bool res; + bool old_source_state; + + /* we need a normal register which is not an output register + an different from the current register of to_split */ r = prefs[i].num; if (!rbitset_is_set(normal_regs, r)) continue; - if (rbitset_is_set(output_regs, r)) + if (rbitset_is_set(forbidden_regs, r)) continue; + if (r == from_r) + continue; + + /* is the split worth it? */ + delta = pref_delta + prefs[i].pref; + if (delta < split_threshold) { + DB((dbg, LEVEL_3, "Not doing optimistical split of %+F (depth %d), win %f too low\n", + to_split, recursion, delta)); + return false; + } + + /* if the register is free then we can do the split */ if (assignments[r] == NULL) break; + + /* otherwise we might try recursively calling optimistic_split */ + if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION) + continue; + + apref = prefs[i].pref; + apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0; + apref_delta += pref_delta - split_threshold; + + /* our source register isn't a usefull destination for recursive + splits */ + old_source_state = rbitset_is_set(forbidden_regs, from_r); + rbitset_set(forbidden_regs, from_r); + /* try recursive split */ + res = try_optimistic_split(assignments[r], before, apref, + apref_delta, forbidden_regs, recursion+1); + /* restore our destination */ + if (old_source_state) { + rbitset_set(forbidden_regs, from_r); + } else { + rbitset_clear(forbidden_regs, from_r); + } + + if (res) + break; } - if (i >= n_regs) { - return false; - } - /* TODO: use execfreq somehow... */ - delta = pref_delta + prefs[i].pref; - if (delta < SPLIT_DELTA) { - DB((dbg, LEVEL_3, "Not doing optimistical split, win %f too low\n", - delta)); + if (i >= n_regs) return false; - } - reg = arch_register_for_index(cls, r); - block = get_nodes_block(before); + reg = arch_register_for_index(cls, r); copy = be_new_Copy(cls, block, to_split); mark_as_copy_of(copy, to_split); - free_reg_of_value(to_split); + /* hacky, but correct here */ + if (assignments[arch_register_get_index(from_reg)] == to_split) + free_reg_of_value(to_split); use_reg(copy, reg); sched_add_before(before, copy); DB((dbg, LEVEL_3, - "Optimistic live-range split %+F move %+F -> %s before %+F (win %f)\n", - copy, to_split, reg->name, before, delta)); + "Optimistic live-range split %+F move %+F(%s) -> %s before %+F (win %f, depth %d)\n", + copy, to_split, from_reg->name, reg->name, before, delta, recursion)); return true; } @@ -611,7 +778,7 @@ static bool try_optimistic_split(ir_node *to_split, ir_node *before, * Determine and assign a register for node @p node */ static void assign_reg(const ir_node *block, ir_node *node, - unsigned *output_regs) + unsigned *forbidden_regs) { const arch_register_t *reg; allocation_info_t *info; @@ -622,6 +789,7 @@ static void assign_reg(const ir_node *block, ir_node *node, const unsigned *allowed_regs; unsigned r; + assert(!is_Phi(node)); assert(arch_irn_consider_in_reg_alloc(cls, node)); /* preassigned register? */ @@ -691,16 +859,13 @@ static void assign_reg(const ir_node *block, ir_node *node, continue; if (assignments[r] == NULL) break; - if (!is_Phi(node)) { - float pref = reg_prefs[i].pref; - float delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0; - ir_node *before = skip_Proj(node); - bool res = try_optimistic_split(assignments[r], before, - pref, delta, - output_regs); - if (res) - break; - } + float pref = reg_prefs[i].pref; + float delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0; + ir_node *before = skip_Proj(node); + bool res = try_optimistic_split(assignments[r], before, + pref, delta, forbidden_regs, 0); + if (res) + break; } if (i >= n_regs) { panic("No register left for %+F\n", node); @@ -978,10 +1143,10 @@ static void determine_live_through_regs(unsigned *bitset, ir_node *node) * @param node the current node */ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, - unsigned *output_regs) + unsigned *forbidden_regs) { int arity = get_irn_arity(node); - int i, dummy, res; + int i, res; hungarian_problem_t *bp; unsigned l, r; unsigned *assignment; @@ -1036,7 +1201,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, determine_live_through_regs(live_through_regs, node); } - rbitset_or(output_regs, req->limited, n_regs); + rbitset_or(forbidden_regs, req->limited, n_regs); if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { good = false; } @@ -1049,7 +1214,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, determine_live_through_regs(live_through_regs, node); if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) { good = false; - rbitset_or(output_regs, req->limited, n_regs); + rbitset_or(forbidden_regs, req->limited, n_regs); } } } @@ -1083,7 +1248,7 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, continue; /* livethrough values may not use constrainted output registers */ if (rbitset_is_set(live_through_regs, l) - && rbitset_is_set(output_regs, r)) + && rbitset_is_set(forbidden_regs, r)) continue; hungarian_add(bp, r, l, l == r ? 9 : 8); @@ -1114,11 +1279,11 @@ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, } } - //hungarian_print_costmatrix(bp, 1); + //hungarian_print_cost_matrix(bp, 1); hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); assignment = ALLOCAN(unsigned, n_regs); - res = hungarian_solve(bp, (int*) assignment, &dummy, 0); + res = hungarian_solve(bp, (int*) assignment, NULL, 0); assert(res == 0); #if 0 @@ -1209,7 +1374,7 @@ static void add_phi_permutations(ir_node *block, int p) if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; - a = find_value_in_block_info(pred_info, op); + a = find_value_in_block_info(pred_info, op); assert(a >= 0); reg = arch_get_irn_register(node); @@ -1292,7 +1457,7 @@ static void adapt_phi_prefs(ir_node *phi) * After a phi has been assigned a register propagate preference inputs * to the phi inputs. */ -static void propagate_phi_register(ir_node *phi, unsigned r) +static void propagate_phi_register(ir_node *phi, unsigned assigned_r) { int i; ir_node *block = get_nodes_block(phi); @@ -1302,16 +1467,109 @@ static void propagate_phi_register(ir_node *phi, unsigned r) ir_node *op = get_Phi_pred(phi, i); allocation_info_t *info = get_allocation_info(op); ir_node *pred_block = get_Block_cfgpred_block(block, i); + unsigned r; float weight = get_block_execfreq(execfreqs, pred_block) * AFF_PHI; - if (info->prefs[r] >= weight) + if (info->prefs[assigned_r] >= weight) continue; /* promote the prefered register */ - info->prefs[r] = AFF_PHI * weight; + for (r = 0; r < n_regs; ++r) { + if (r == assigned_r) { + info->prefs[r] = AFF_PHI * weight; + } else { + info->prefs[r] -= AFF_PHI * weight; + } + } + if (is_Phi(op)) - propagate_phi_register(op, r); + propagate_phi_register(op, assigned_r); + } +} + +static void assign_phi_registers(ir_node *block) +{ + int n_phis = 0; + int n; + int res; + int *assignment; + ir_node *node; + hungarian_problem_t *bp; + + /* count phi nodes */ + sched_foreach(block, node) { + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + ++n_phis; + } + + if (n_phis == 0) + return; + + /* build a bipartite matching problem for all phi nodes */ + bp = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT); + n = 0; + sched_foreach(block, node) { + unsigned r; + + allocation_info_t *info; + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + /* give boni for predecessor colorings */ + adapt_phi_prefs(node); + /* add stuff to bipartite problem */ + info = get_allocation_info(node); + DB((dbg, LEVEL_3, "Prefs for %+F: ", node)); + for (r = 0; r < n_regs; ++r) { + float costs; + + if (!rbitset_is_set(normal_regs, r)) + continue; + + costs = info->prefs[r]; + costs = costs < 0 ? -logf(-costs+1) : logf(costs+1); + costs *= 100; + costs += 10000; + hungarian_add(bp, n, r, costs); + DB((dbg, LEVEL_3, " %s(%f)", arch_register_for_index(cls, r)->name, + info->prefs[r])); + } + DB((dbg, LEVEL_3, "\n")); + ++n; + } + + //hungarian_print_cost_matrix(bp, 7); + hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); + + assignment = ALLOCAN(int, n_regs); + res = hungarian_solve(bp, assignment, NULL, 0); + assert(res == 0); + + /* apply results */ + n = 0; + sched_foreach(block, node) { + unsigned r; + const arch_register_t *reg; + + if (!is_Phi(node)) + break; + if (!arch_irn_consider_in_reg_alloc(cls, node)) + continue; + + r = assignment[n++]; + assert(rbitset_is_set(normal_regs, r)); + reg = arch_register_for_index(cls, r); + DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); + use_reg(node, reg); + + /* adapt preferences for phi inputs */ + propagate_phi_register(node, r); } } @@ -1324,13 +1582,13 @@ static void allocate_coalesce_block(ir_node *block, void *data) int i; ir_nodeset_t live_nodes; ir_nodeset_iterator_t iter; - ir_node *node, *start; + ir_node *node; int n_preds; block_info_t *block_info; block_info_t **pred_block_infos; ir_node **phi_ins; - unsigned *output_regs; /**< collects registers which must not - be used for optimistic splits */ + unsigned *forbidden_regs; /**< collects registers which must + not be used for optimistic splits */ (void) data; DB((dbg, LEVEL_2, "* Block %+F\n", block)); @@ -1418,11 +1676,6 @@ static void allocate_coalesce_block(ir_node *block, void *data) /* if the node already has a register assigned use it */ reg = arch_get_irn_register(node); if (reg != NULL) { - /* TODO: consult pred-block infos here. The value could be copied - away in some/all predecessor blocks. We need to construct - phi-nodes in this case. - We even need to construct some Phi_0 like constructs in cases - where the predecessor allocation is not determined yet. */ use_reg(node, reg); } @@ -1430,58 +1683,48 @@ static void allocate_coalesce_block(ir_node *block, void *data) ir_nodeset_insert(&live_nodes, node); } - rbitset_alloca(output_regs, n_regs); + rbitset_alloca(forbidden_regs, n_regs); /* handle phis... */ - node = sched_first(block); - for ( ; is_Phi(node); node = sched_next(node)) { - const arch_register_t *reg; + assign_phi_registers(block); - if (!arch_irn_consider_in_reg_alloc(cls, node)) - continue; - - /* fill in regs already assigned */ - reg = arch_get_irn_register(node); - if (reg != NULL) { - use_reg(node, reg); - } else { - adapt_phi_prefs(node); - assign_reg(block, node, output_regs); - - reg = arch_get_irn_register(node); - propagate_phi_register(node, arch_register_get_index(reg)); - } - } - start = node; - - /* assign regs for live-in values */ + /* all live-ins must have a register */ +#ifdef DEBUG_libfirm foreach_ir_nodeset(&live_nodes, node, iter) { const arch_register_t *reg = arch_get_irn_register(node); - if (reg != NULL) - continue; - - assign_reg(block, node, output_regs); - /* shouldn't happen if we color in dominance order */ - assert (!is_Phi(node)); + assert(reg != NULL); } +#endif /* assign instructions in the block */ - for (node = start; !sched_is_end(node); node = sched_next(node)) { - unsigned r; + sched_foreach(block, node) { + int i; + int arity; + + /* phis are already assigned */ + if (is_Phi(node)) + continue; rewire_inputs(node); /* enforce use constraints */ - rbitset_clear_all(output_regs, n_regs); - enforce_constraints(&live_nodes, node, output_regs); - /* we may not use registers occupied here for optimistic splits */ - for (r = 0; r < n_regs; ++r) { - if (assignments[r] != NULL) - rbitset_set(output_regs, r); - } + rbitset_clear_all(forbidden_regs, n_regs); + enforce_constraints(&live_nodes, node, forbidden_regs); rewire_inputs(node); + /* we may not use registers used for inputs for optimistic splits */ + arity = get_irn_arity(node); + for (i = 0; i < arity; ++i) { + ir_node *op = get_irn_n(node, i); + const arch_register_t *reg; + if (!arch_irn_consider_in_reg_alloc(cls, op)) + continue; + + reg = arch_get_irn_register(op); + rbitset_set(forbidden_regs, arch_register_get_index(reg)); + } + /* free registers of values last used at this instruction */ free_last_uses(&live_nodes, node); @@ -1493,10 +1736,10 @@ static void allocate_coalesce_block(ir_node *block, void *data) ir_node *proj = get_edge_src_irn(edge); if (!arch_irn_consider_in_reg_alloc(cls, proj)) continue; - assign_reg(block, proj, output_regs); + assign_reg(block, proj, forbidden_regs); } } else if (arch_irn_consider_in_reg_alloc(cls, node)) { - assign_reg(block, node, output_regs); + assign_reg(block, node, forbidden_regs); } } @@ -1528,11 +1771,129 @@ static void allocate_coalesce_block(ir_node *block, void *data) } } +typedef struct block_costs_t block_costs_t; +struct block_costs_t { + float costs; /**< costs of the block */ + int dfs_num; /**< depth first search number (to detect backedges) */ +}; + +static int cmp_block_costs(const void *d1, const void *d2) +{ + const ir_node * const *block1 = d1; + const ir_node * const *block2 = d2; + const block_costs_t *info1 = get_irn_link(*block1); + const block_costs_t *info2 = get_irn_link(*block2); + return QSORT_CMP(info2->costs, info1->costs); +} + +static void determine_block_order(void) +{ + int i; + ir_node **blocklist = be_get_cfgpostorder(irg); + int n_blocks = ARR_LEN(blocklist); + int dfs_num = 0; + pdeq *worklist = new_pdeq(); + ir_node **order = XMALLOCN(ir_node*, n_blocks); + int order_p = 0; + + /* clear block links... */ + for (i = 0; i < n_blocks; ++i) { + ir_node *block = blocklist[i]; + set_irn_link(block, NULL); + } + + /* walk blocks in reverse postorder, the costs for each block are the + * sum of the costs of its predecessors (excluding the costs on backedges + * which we can't determine) */ + for (i = n_blocks-1; i >= 0; --i) { + block_costs_t *cost_info; + ir_node *block = blocklist[i]; + + float execfreq = get_block_execfreq(execfreqs, block); + float costs = execfreq; + int n_cfgpreds = get_Block_n_cfgpreds(block); + int p; + for (p = 0; p < n_cfgpreds; ++p) { + ir_node *pred_block = get_Block_cfgpred_block(block, p); + block_costs_t *pred_costs = get_irn_link(pred_block); + /* we don't have any info for backedges */ + if (pred_costs == NULL) + continue; + costs += pred_costs->costs; + } + + cost_info = OALLOCZ(&obst, block_costs_t); + cost_info->costs = costs; + cost_info->dfs_num = dfs_num++; + set_irn_link(block, cost_info); + } + + /* sort array by block costs */ + qsort(blocklist, n_blocks, sizeof(blocklist[0]), cmp_block_costs); + + ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED); + inc_irg_block_visited(irg); + + for (i = 0; i < n_blocks; ++i) { + ir_node *block = blocklist[i]; + if (Block_block_visited(block)) + continue; + + /* continually add predecessors with highest costs to worklist + * (without using backedges) */ + do { + block_costs_t *info = get_irn_link(block); + ir_node *best_pred = NULL; + float best_costs = -1; + int n_cfgpred = get_Block_n_cfgpreds(block); + int i; + + pdeq_putr(worklist, block); + mark_Block_block_visited(block); + for (i = 0; i < n_cfgpred; ++i) { + ir_node *pred_block = get_Block_cfgpred_block(block, i); + block_costs_t *pred_info = get_irn_link(pred_block); + + /* ignore backedges */ + if (pred_info->dfs_num > info->dfs_num) + continue; + + if (info->costs > best_costs) { + best_costs = info->costs; + best_pred = pred_block; + } + } + block = best_pred; + } while(block != NULL && !Block_block_visited(block)); + + /* now put all nodes in the worklist in our final order */ + while (!pdeq_empty(worklist)) { + ir_node *pblock = pdeq_getr(worklist); + assert(order_p < n_blocks); + order[order_p++] = pblock; + } + } + assert(order_p == n_blocks); + del_pdeq(worklist); + + ir_free_resources(irg, IR_RESOURCE_BLOCK_VISITED); + + DEL_ARR_F(blocklist); + + obstack_free(&obst, NULL); + obstack_init(&obst); + + block_order = order; + n_block_order = n_blocks; +} + /** * Run the register allocator for the current register class. */ static void be_straight_alloc_cls(void) { + int i; + lv = be_assure_liveness(birg); be_liveness_assure_sets(lv); be_liveness_assure_chk(lv); @@ -1544,9 +1905,11 @@ static void be_straight_alloc_cls(void) be_clear_links(irg); irg_block_walk_graph(irg, NULL, analyze_block, NULL); combine_congruence_classes(); - /* we need some dominance pre-order walk to ensure we see all - * definitions/create copies before we encounter their users */ - dom_tree_walk_irg(irg, allocate_coalesce_block, NULL, NULL); + + for (i = 0; i < n_block_order; ++i) { + ir_node *block = block_order[i]; + allocate_coalesce_block(block, NULL); + } ir_free_resources(irg, IR_RESOURCE_IRN_LINK); } @@ -1597,8 +1960,8 @@ static void be_straight_alloc(be_irg_t *new_birg) irg = be_get_birg_irg(birg); execfreqs = birg->exec_freq; - /* TODO: extract some of the stuff from bechordal allocator, like - * statistics, time measurements, etc. and use them here too */ + /* determine a good coloring order */ + determine_block_order(); for (c = 0; c < n_cls; ++c) { cls = arch_env_get_reg_class(arch_env, c);