3 * More experiments on coalescing.
4 * @author Sebastian Hack
13 #include <libcore/lc_opts.h>
14 #include <libcore/lc_opts_enum.h>
15 #endif /* WITH_LIBCORE */
25 #include "bitfiddle.h"
27 #include "irphase_t.h"
28 #include "irgraph_t.h"
35 #include "becopyopt.h"
36 #include "becopyopt_t.h"
37 #include "bechordal_t.h"
42 #define DUMP_ALL 2 * DUMP_CLOUD - 1
44 static int dump_flags = 0;
45 static int subtree_iter = 4;
46 static double constr_factor = 0.5;
48 /* Options using libcore */
51 static const lc_opt_enum_mask_items_t dump_items[] = {
52 { "before", DUMP_BEFORE },
53 { "after", DUMP_AFTER },
54 { "cloud", DUMP_CLOUD },
59 static lc_opt_enum_mask_var_t dump_var = {
60 &dump_flags, dump_items
63 static const lc_opt_table_entry_t options[] = {
64 LC_OPT_ENT_ENUM_MASK("dump", "dump ifg before, after or after each cloud", &dump_var),
65 LC_OPT_ENT_INT ("iter", "iterations for subtree nodes (standard: 3)", &subtree_iter),
66 LC_OPT_ENT_DBL ("cf", "factor of constraint importance (between 0.0 and 1.0)", &constr_factor),
70 void be_co2_register_options(lc_opt_entry_t *grp)
72 lc_opt_entry_t *co2_grp = lc_opt_get_grp(grp, "co2");
73 lc_opt_add_table(co2_grp, options);
79 / ___|| |_ __ _ _ __| |_
80 \___ \| __/ _` | '__| __|
81 ___) | || (_| | | | |_
82 |____/ \__\__,_|_| \__|
86 #define INFEASIBLE(cost) ((cost) == INT_MAX)
88 static be_ifg_dump_dot_cb_t ifg_dot_cb;
90 typedef unsigned col_t;
92 typedef struct _co2_irn_t co2_irn_t;
93 typedef struct _co2_cloud_t co2_cloud_t;
94 typedef struct _co2_cloud_irn_t co2_cloud_irn_t;
104 bitset_t *ignore_regs;
108 struct list_head cloud_head;
109 DEBUG_ONLY(firm_dbg_module_t *dbg;)
114 affinity_node_t *aff;
115 co2_irn_t *touched_next;
118 int last_color_change;
121 unsigned tmp_fixed : 1;
122 unsigned is_constrained : 1;
123 struct list_head changed_list;
126 struct _co2_cloud_irn_t {
127 struct _co2_irn_t inh;
131 co2_cloud_irn_t *mst_parent;
134 co2_cloud_irn_t **mst_childs;
139 col_cost_pair_t *tmp_coloring;
140 struct list_head cloud_list;
141 struct list_head mst_list;
144 struct _co2_cloud_t {
156 co2_cloud_irn_t *master;
157 co2_cloud_irn_t *mst_root;
158 co2_cloud_irn_t **seq;
159 struct list_head members_head;
160 struct list_head list;
163 #define FRONT_BASE(ci,col) ((ci)->fronts + col * (ci)->mst_n_childs)
165 #define get_co2_irn(co2, irn) ((co2_irn_t *) phase_get_or_set_irn_data(&co2->ph, irn))
166 #define get_co2_cloud_irn(co2, irn) ((co2_cloud_irn_t *) phase_get_or_set_irn_data(&co2->ph, irn))
168 static void *co2_irn_init(phase_t *ph, ir_node *irn, void *data)
170 co2_t *env = (co2_t *) ph;
171 affinity_node_t *a = get_affinity_info(env->co, irn);
172 size_t size = a ? sizeof(co2_cloud_irn_t) : sizeof(co2_irn_t);
173 co2_irn_t *ci = data ? data : phase_alloc(ph, size);
176 INIT_LIST_HEAD(&ci->changed_list);
177 ci->touched_next = env->touched;
178 ci->orig_col = get_irn_col(env->co, irn);
184 co2_cloud_irn_t *cci = (co2_cloud_irn_t *) ci;
185 INIT_LIST_HEAD(&cci->cloud_list);
186 cci->mst_parent = cci;
192 #define CLOUD_WEIGHT(c) ((1 - constr_factor) * (c)->costs + constr_factor * (c)->freedom)
194 static int cmp_clouds_gt(const void *a, const void *b)
196 const co2_cloud_t **p = a;
197 const co2_cloud_t **q = b;
198 double c = CLOUD_WEIGHT(*p);
199 double d = CLOUD_WEIGHT(*q);
200 return QSORT_CMP(d, c);
204 * An order on color/costs pairs.
205 * If the costs are equal, we use the color as a kind of normalization.
207 static int col_cost_pair_lt(const void *a, const void *b)
209 const col_cost_pair_t *p = a;
210 const col_cost_pair_t *q = b;
213 return QSORT_CMP(c, d);
216 static col_t get_col(co2_t *env, ir_node *irn)
218 co2_irn_t *ci = get_co2_irn(env, irn);
219 return ci->tmp_fixed ? ci->tmp_col : ci->orig_col;
222 static INLINE int color_is_fix(co2_t *env, ir_node *irn)
224 co2_irn_t *ci = get_co2_irn(env, irn);
225 return ci->fixed || ci->tmp_fixed;
228 static INLINE bitset_t *get_adm(co2_t *env, co2_irn_t *ci)
231 arch_register_req_t req;
232 ci->adm_cache = bitset_obstack_alloc(phase_obst(&env->ph), env->n_regs);
233 arch_get_register_req(env->co->aenv, &req, ci->irn, BE_OUT_POS(0));
234 if(arch_register_req_is(&req, limited)) {
235 req.limited(req.limited_env, ci->adm_cache);
236 ci->is_constrained = 1;
239 bitset_copy(ci->adm_cache, env->ignore_regs);
240 bitset_flip_all(ci->adm_cache);
244 return ci->adm_cache;
247 static INLINE bitset_t *admissible_colors(co2_t *env, co2_irn_t *ci, bitset_t *bs)
249 bitset_copy(bs, get_adm(env, ci));
253 static INLINE int is_color_admissible(co2_t *env, co2_irn_t *ci, col_t col)
255 bitset_t *bs = get_adm(env, ci);
256 return bitset_is_set(bs, col);
259 static INLINE int is_constrained(co2_t *env, co2_irn_t *ci)
263 return ci->is_constrained;
266 static void incur_constraint_costs(co2_t *env, ir_node *irn, col_cost_pair_t *col_costs, int costs)
268 bitset_t *aux = bitset_alloca(env->co->cls->n_regs);
269 arch_register_req_t req;
271 arch_get_register_req(env->co->aenv, &req, irn, BE_OUT_POS(0));
273 if(arch_register_req_is(&req, limited)) {
277 req.limited(req.limited_env, aux);
278 n_constr = bitset_popcnt(aux);
279 bitset_foreach(aux, elm) {
280 col_costs[elm].costs = add_saturated(col_costs[elm].costs, costs / n_constr);
286 * Determine costs which shall indicate how cheap/expensive it is to try
287 * to assign a node some color.
288 * The costs are computed for all colors. INT_MAX means that it is impossible
289 * to give the node that specific color.
291 * @param env The co2 this pointer.
292 * @param irn The node.
293 * @param col_costs An array of colors x costs where the costs are written to.
295 static void determine_color_costs(co2_t *env, co2_irn_t *ci, col_cost_pair_t *col_costs)
297 ir_node *irn = ci->irn;
298 be_ifg_t *ifg = env->co->cenv->ifg;
299 int n_regs = env->co->cls->n_regs;
300 bitset_t *forb = bitset_alloca(n_regs);
301 affinity_node_t *a = ci->aff;
308 /* Put all forbidden colors into the aux bitset. */
309 admissible_colors(env, ci, forb);
310 bitset_flip_all(forb);
312 for(i = 0; i < n_regs; ++i) {
313 col_costs[i].col = i;
314 col_costs[i].costs = 0;
320 co_gs_foreach_neighb(a, n) {
321 if(color_is_fix(env, n->irn)) {
322 col_t col = get_col(env, n->irn);
323 col_costs[col].costs = add_saturated(col_costs[col].costs, -n->costs * 128);
326 incur_constraint_costs(env, n->irn, col_costs, -n->costs);
330 it = be_ifg_neighbours_iter_alloca(ifg);
331 be_ifg_foreach_neighbour(ifg, it, irn, pos) {
332 col_t col = get_col(env, pos);
333 if(color_is_fix(env, pos)) {
334 col_costs[col].costs = INT_MAX;
337 incur_constraint_costs(env, pos, col_costs, INT_MAX);
338 col_costs[col].costs = add_saturated(col_costs[col].costs, 8 * be_ifg_degree(ifg, pos));
341 be_ifg_neighbours_break(ifg, it);
343 /* Set the costs to infinity for each color which is not allowed at this node. */
344 bitset_foreach(forb, elm) {
345 col_costs[elm].costs = INT_MAX;
350 static void single_color_cost(co2_t *env, co2_irn_t *ci, col_t col, col_cost_pair_t *seq)
352 int n_regs = env->co->cls->n_regs;
355 for(i = 0; i < n_regs; ++i) {
357 seq[i].costs = INT_MAX;
360 assert(is_color_admissible(env, ci, col));
366 static void reject_coloring(struct list_head *h)
370 list_for_each_entry(co2_irn_t, pos, h, changed_list)
374 static void materialize_coloring(struct list_head *h)
378 list_for_each_entry(co2_irn_t, pos, h, changed_list) {
379 pos->orig_col = pos->tmp_col;
389 static col_entry_t *save_coloring(struct obstack *obst, struct list_head *changed)
394 list_for_each_entry(co2_irn_t, pos, changed, changed_list) {
396 ent.col = pos->tmp_col;
398 obstack_grow(obst, &ent, sizeof(ent));
400 memset(&ent, 0, sizeof(ent));
401 obstack_grow(obst, &ent, sizeof(ent));
402 return obstack_finish(obst);
405 static int change_color_not(co2_t *env, ir_node *irn, col_t not_col, struct list_head *parent_changed, int depth);
406 static int change_color_single(co2_t *env, ir_node *irn, col_t tgt_col, struct list_head *parent_changed, int depth);
408 static int recolor(co2_t *env, ir_node *irn, col_cost_pair_t *col_list, struct list_head *parent_changed, int depth)
410 int n_regs = env->co->cls->n_regs;
411 be_ifg_t *ifg = env->co->cenv->ifg;
412 co2_irn_t *ci = get_co2_irn(env, irn);
418 for(i = 0; i < n_regs; ++i) {
419 col_t tgt_col = col_list[i].col;
420 unsigned costs = col_list[i].costs;
423 struct list_head changed;
427 DBG((env->dbg, LEVEL_3, "\t\t%2{firm:indent}trying color %d(%d) on %+F\n", depth, tgt_col, costs, irn));
429 /* If the costs for that color (and all successive) are infinite, bail out we won't make it anyway. */
430 if(INFEASIBLE(costs)) {
431 DB((env->dbg, LEVEL_4, "\t\t%2{firm:indent}color %d infeasible\n", depth, tgt_col));
436 /* Set the new color of the node and mark the node as temporarily fixed. */
437 ci->tmp_col = tgt_col;
441 If that color has costs > 0, there's at least one neighbor having that color,
442 so we will try to change the neighbors' colors, too.
444 INIT_LIST_HEAD(&changed);
445 list_add(&ci->changed_list, &changed);
447 it = be_ifg_neighbours_iter_alloca(ifg);
448 be_ifg_foreach_neighbour(ifg, it, irn, n) {
450 /* try to re-color the neighbor if it has the target color. */
451 if(get_col(env, n) == tgt_col) {
452 struct list_head tmp;
455 Try to change the color of the neighbor and record all nodes which
456 get changed in the tmp list. Add this list to the "changed" list for
457 that color. If we did not succeed to change the color of the neighbor,
458 we bail out and try the next color.
460 INIT_LIST_HEAD(&tmp);
461 neigh_ok = change_color_not(env, n, tgt_col, &tmp, depth + 1);
462 list_splice(&tmp, &changed);
467 be_ifg_neighbours_break(ifg, it);
470 We managed to assign the target color to all neighbors, so from the perspective
471 of the current node, every thing was ok and we can return safely.
474 DBG((env->dbg, LEVEL_3, "\t\t%2{firm:indent}color %d(%d) was ok\n", depth, tgt_col, costs));
475 list_splice(&changed, parent_changed);
481 If not, that color did not succeed and we unfix all nodes we touched
482 by traversing the changed list and setting tmp_fixed to 0 for these nodes.
485 reject_coloring(&changed);
491 static int change_color_not(co2_t *env, ir_node *irn, col_t not_col, struct list_head *parent_changed, int depth)
493 co2_irn_t *ci = get_co2_irn(env, irn);
495 col_t col = get_col(env, irn);
497 DBG((env->dbg, LEVEL_3, "\t\t%2{firm:indent}clearing %+F(%d) of color %d\n", depth, irn, col, not_col));
499 /* the node does not have to forbidden color. That's fine, mark it as visited and return. */
506 list_add(&ci->changed_list, parent_changed);
510 /* The node has the color it should not have _and_ has not been visited yet. */
511 if(!color_is_fix(env, irn)) {
512 int n_regs = env->co->cls->n_regs;
513 col_cost_pair_t *csts = alloca(n_regs * sizeof(csts[0]));
515 /* Get the costs for giving the node a specific color. */
516 determine_color_costs(env, ci, csts);
518 /* Since the node must not have the not_col, set the costs for that color to "infinity" */
519 csts[not_col].costs = INT_MAX;
521 /* sort the colors according costs, cheapest first. */
522 qsort(csts, n_regs, sizeof(csts[0]), col_cost_pair_lt);
524 /* Try recoloring the node using the color list. */
525 res = recolor(env, irn, csts, parent_changed, depth);
528 /* If we came here, everything went ok. */
532 static int change_color_single(co2_t *env, ir_node *irn, col_t tgt_col, struct list_head *parent_changed, int depth)
534 co2_irn_t *ci = get_co2_irn(env, irn);
535 col_t col = get_col(env, irn);
538 DBG((env->dbg, LEVEL_3, "\t\t%2{firm:indent}trying to set %+F(%d) to color %d\n", depth, irn, col, tgt_col));
540 /* the node has the wanted color. That's fine, mark it as visited and return. */
545 list_add(&ci->changed_list, parent_changed);
552 if(!color_is_fix(env, irn) && is_color_admissible(env, ci, tgt_col)) {
553 int n_regs = env->co->cls->n_regs;
554 col_cost_pair_t *seq = alloca(n_regs * sizeof(seq[0]));
556 /* Get the costs for giving the node a specific color. */
557 single_color_cost(env, ci, tgt_col, seq);
559 /* Try recoloring the node using the color list. */
560 res = recolor(env, irn, seq, parent_changed, depth);
565 DB((env->dbg, LEVEL_3, "\t\t%2{firm:indent}color %d %s for %+F\n", depth, tgt_col, res ? "was ok" : "failed", irn));
569 static void front_inval_color(co2_cloud_irn_t *ci, col_t col)
571 int *base = FRONT_BASE(ci, col);
572 memset(base, -1, ci->mst_n_childs * sizeof(base[0]));
576 co2_cloud_irn_t *src, *tgt;
580 int cmp_edges(const void *a, const void *b)
584 return QSORT_CMP(q->costs, p->costs);
587 static co2_cloud_irn_t *find_mst_root(co2_cloud_irn_t *ci)
589 while(ci != ci->mst_parent)
595 static int cmp_parent(const void *a, const void *b)
597 const co2_cloud_irn_t *p = a;
598 const co2_cloud_irn_t *q = b;
599 return QSORT_CMP(q->mst_costs, p->mst_costs);
602 static void fill_tmp_coloring(co2_cloud_irn_t *ci, col_t col)
604 int n_regs = ci->cloud->env->n_regs;
607 for(i = 0; i < ci->mst_n_childs; ++i) {
608 co2_cloud_irn_t *c = ci->mst_childs[i];
609 for(j = 0; j < n_regs; ++j) {
610 int costs = c->col_costs[j];
611 if(INFEASIBLE(costs))
612 c->tmp_coloring[j].costs = INT_MAX;
614 int add = j != (int) col ? c->mst_costs : 0;
615 c->tmp_coloring[j].costs = add + costs;
617 c->tmp_coloring[j].col = j;
619 qsort(c->tmp_coloring, n_regs, sizeof(c->tmp_coloring[0]), col_cost_pair_lt);
623 static void determine_start_colors(co2_cloud_irn_t *ci, col_cost_pair_t *seq)
625 int n_regs = ci->cloud->env->n_regs;
626 bitset_t *adm = bitset_alloca(n_regs);
629 // TODO: Prefer some colors depending on the neighbors, etc.
631 admissible_colors(ci->cloud->env, &ci->inh, adm);
632 for(i = 0; i < n_regs; ++i) {
635 if (!bitset_is_set(adm, i))
636 seq[i].costs = INT_MAX;
639 for(j = 0; j < ci->mst_n_childs; ++j) {
640 co2_cloud_irn_t *child = ci->mst_childs[j];
641 if (!INFEASIBLE(child->col_costs[i]))
642 seq[i].costs -= ci->mst_childs[j]->col_costs[i];
647 qsort(seq, n_regs, sizeof(seq[0]), col_cost_pair_lt);
650 static int push_front(co2_cloud_irn_t *ci, int *front)
652 co2_t *env = ci->cloud->env;
653 int n_regs = env->n_regs;
654 int min_diff = INT_MAX;
658 for(i = 0; i < ci->mst_n_childs; ++i) {
659 co2_cloud_irn_t *child = ci->mst_childs[i];
663 if(idx + 1 < n_regs) {
664 int diff = child->tmp_coloring[idx].costs - child->tmp_coloring[idx + 1].costs;
665 if(diff < min_diff) {
673 co2_cloud_irn_t *child = ci->mst_childs[min_chld];
674 DBG((env->dbg, LEVEL_3, "\tsmallest diff with child %+F on index %d is %d\n", child->inh.irn, front[min_chld], min_diff));
675 front[min_chld] += 1;
681 static int color_subtree(co2_cloud_irn_t *ci, col_t col, struct list_head *changed, int depth)
683 int n_childs = ci->mst_n_childs;
685 select the front for the given color.
686 The front will determine the colors of the children.
688 int *front = FRONT_BASE(ci, col);
691 ok = change_color_single(ci->cloud->env, ci->inh.irn, col, changed, 0);
692 for(i = 0; i < n_childs && ok; ++i) {
693 co2_cloud_irn_t *child = ci->mst_childs[i];
694 col_t col = front[i];
696 ok = color_subtree(child, col, changed, depth + 1);
702 static int try_coloring(co2_cloud_irn_t *ci, col_t col, int *front, int *initial_ok, int depth)
704 co2_t *env = ci->cloud->env;
705 struct list_head changed;
708 INIT_LIST_HEAD(&changed);
709 *initial_ok = ok = change_color_single(env, ci->inh.irn, col, &changed, depth + 1);
711 for (i = 0; i < ci->mst_n_childs && ok; ++i) {
712 co2_cloud_irn_t *child = ci->mst_childs[i];
713 col_t tgt_col = child->tmp_coloring[front[i]].col;
715 ok = color_subtree(child, tgt_col, &changed, depth + 1);
718 reject_coloring(&changed);
723 static int examine_subtree_coloring(co2_cloud_irn_t *ci, col_t col)
725 int *front = FRONT_BASE(ci, col);
729 for(i = 0; i < ci->mst_n_childs; ++i) {
730 co2_cloud_irn_t *chld = ci->mst_childs[i];
731 col_t chld_col = front[i];
733 cost += examine_subtree_coloring(chld, chld_col);
734 cost += col != chld_col ? chld->mst_costs : 0;
740 static int cloud_mst_build_colorings(co2_cloud_irn_t *ci, int depth)
742 co2_t *env = ci->cloud->env;
743 int n_regs = env->n_regs;
744 col_cost_pair_t *seq = alloca(n_regs * sizeof(seq[0]));
745 int *front = alloca(ci->mst_n_childs * sizeof(front[0]));
747 int best_cost = INT_MAX;
752 DBG((env->dbg, LEVEL_2, "\t%2{firm:indent}build colorings: %+F\n", depth, ci->inh.irn));
754 for (i = 0; i < ci->mst_n_childs; ++i)
755 cloud_mst_build_colorings(ci->mst_childs[i], depth + 1);
757 for (i = 0; i < n_regs; ++i)
758 ci->col_costs[i] = INT_MAX;
760 /* Sort the children according to the cost of the affinity edge they have to the current node. */
761 // qsort(child, ci->mst_n_childs, sizeof(childs[0]), cmp_parent);
763 determine_start_colors(ci, seq);
764 // qsort(seq, n_regs, sizeof(seq[0]), col_cost_pair_lt);
766 for(i = 0; i < n_regs; ++i) {
767 col_t col = seq[i].col;
768 int costs = seq[i].costs;
771 if(INFEASIBLE(costs))
775 Judge, if it is worthwhile trying this color.
776 If another color was so good that we cannot get any better, bail out here.
780 DBG((env->dbg, LEVEL_2, "\t%2{firm:indent}%+F trying color %d\n", depth, ci->inh.irn, col));
782 /* This sorts the tmp_coloring array in the children according to the costs of the current color. */
783 fill_tmp_coloring(ci, col);
785 /* Initialize the front. It gives the indexes into the color tmp_coloring array. */
786 memset(front, 0, ci->mst_n_childs * sizeof(front));
789 As long as we have color configurations to try.
790 We try the best ones first and get worse over and over.
795 if (try_coloring(ci, col, front, &try_push, depth + 1)) {
796 int *res_front = FRONT_BASE(ci, col);
799 for(j = 0; j < ci->mst_n_childs; ++j) {
800 co2_cloud_irn_t *child = ci->mst_childs[j];
801 col_t col = child->tmp_coloring[front[j]].col;
805 costs = examine_subtree_coloring(ci, col);
806 ci->col_costs[col] = costs;
809 /* Set the current best color. */
810 if(costs < best_cost) {
816 DBG((env->dbg, LEVEL_2, "\t%2{firm:indent}-> %s\n", depth, done ? "ok" : "failed"));
818 /* Worsen the configuration, if that one didn't succeed. */
820 done = try_push ? push_front(ci, front) < 0 : 1;
824 DBG((env->dbg, LEVEL_2, "%2{firm:indent} %+F\n", depth, ci->inh.irn));
825 for(i = 0; i < n_regs; ++i)
826 DBG((env->dbg, LEVEL_2, "%2{firm:indent} color %d costs %d\n", depth, i, ci->col_costs[i]));
831 static void node_color_badness(co2_cloud_irn_t *ci, int *badness)
833 co2_t *env = ci->cloud->env;
834 co2_irn_t *ir = &ci->inh;
835 int n_regs = env->n_regs;
836 be_ifg_t *ifg = env->co->cenv->ifg;
837 bitset_t *bs = bitset_alloca(n_regs);
843 admissible_colors(env, &ci->inh, bs);
845 bitset_foreach(bs, elm)
846 badness[elm] = ci->costs;
848 /* Use constrained/fixed interfering neighbors to influence the color badness */
849 it = be_ifg_neighbours_iter_alloca(ifg);
850 be_ifg_foreach_neighbour(ifg, it, ir->irn, irn) {
851 co2_irn_t *ni = get_co2_irn(env, irn);
853 admissible_colors(env, ni, bs);
854 if(bitset_popcnt(bs) == 1) {
855 bitset_pos_t c = bitset_next_set(bs, 0);
856 badness[c] += ci->costs;
860 col_t c = get_col(env, ni->irn);
861 badness[c] += ci->costs;
864 be_ifg_neighbours_break(ifg, it);
868 static int cloud_color_badness(co2_cloud_t *cloud)
870 int *badness = alloca(cloud->env->n_regs * sizeof(badness[0]));
873 memset(badness, 0, cloud->env->n_regs * sizeof(badness[0]));
874 for(i = 0; i < cloud->n_memb; ++i)
875 node_color_badness(cloud->seq[i], badness);
878 static void determine_color_badness(co2_cloud_irn_t *ci, int depth)
880 co2_t *env = ci->cloud->env;
883 node_color_badness(ci, ci->color_badness);
885 /* Collect the color badness for the whole subtree */
886 for(i = 0; i < ci->mst_n_childs; ++i) {
887 co2_cloud_irn_t *child = ci->mst_childs[i];
888 determine_color_badness(child, depth + 1);
890 for(j = 0; j < env->n_regs; ++j)
891 ci->color_badness[j] += child->color_badness[j];
894 for(j = 0; j < env->n_regs; ++j)
895 DBG((env->dbg, LEVEL_2, "%2{firm:indent}%+F col %d badness %d\n", depth, ci->inh.irn, j, ci->color_badness[j]));
898 static void unfix_subtree(co2_cloud_irn_t *ci)
903 for(i = 0; i < ci->mst_n_childs; ++i)
904 unfix_subtree(ci->mst_childs[i]);
907 static int coalesce_top_down(co2_cloud_irn_t *ci, int child_nr, int depth)
909 co2_t *env = ci->cloud->env;
910 col_cost_pair_t *seq = alloca(env->n_regs * sizeof(seq[0]));
911 int is_root = ci->mst_parent == ci;
912 col_t parent_col = is_root ? -1 : get_col(env, ci->mst_parent->inh.irn);
913 int min_badness = INT_MAX;
914 int best_col_costs = INT_MAX;
916 int n_regs = env->n_regs;
917 int n_iter = is_root ? MIN(n_regs, subtree_iter) : 1;
919 struct list_head changed;
922 for(i = 0; i < n_regs; ++i) {
923 int badness = ci->color_badness[i];
926 seq[i].costs = is_color_admissible(env, &ci->inh, i) ? badness : INT_MAX;
928 min_badness = MIN(min_badness, badness);
931 /* If we are not the root and the parent's color is allowed for this node give it top prio. */
932 if(!is_root && is_color_admissible(env, &ci->inh, parent_col))
933 seq[parent_col].costs = min_badness - 1;
935 /* Sort the colors. The will be processed in that ordering. */
936 qsort(seq, env->n_regs, sizeof(seq[0]), col_cost_pair_lt);
938 DBG((env->dbg, LEVEL_2, "\t%2{firm:indent}starting top-down coalesce for %+F\n", depth, ci->inh.irn));
939 INIT_LIST_HEAD(&changed);
940 for(i = 0; i < (best_col < 0 ? n_regs : n_iter); ++i) {
941 col_t col = seq[i].col;
942 int costs = seq[i].costs;
943 int add_cost = !is_root && col != parent_col ? ci->mst_costs : 0;
945 int subtree_costs, sum_costs;
947 DBG((env->dbg, LEVEL_2, "\t%2{firm:indent}%+F trying color %d\n", depth, ci->inh.irn, col));
950 INIT_LIST_HEAD(&changed);
951 ok = change_color_single(env, ci->inh.irn, col, &changed, depth);
953 materialize_coloring(&changed);
960 for(j = 0; j < ci->mst_n_childs; ++j) {
961 co2_cloud_irn_t *child = ci->mst_childs[j];
962 ok = coalesce_top_down(child, j, depth + 1) >= 0;
964 child->inh.fixed = 1;
969 /* If the subtree could not be colored, we have to try another color. */
973 subtree_costs = examine_subtree_coloring(ci, col);
974 sum_costs = subtree_costs + add_cost;
975 DBG((env->dbg, LEVEL_2, "\t%2{firm:indent}-> %+F costing %d + %d is ok.\n", depth, ci->inh.irn, subtree_costs, add_cost));
977 if(sum_costs < best_col_costs) {
979 best_col_costs = sum_costs;
980 ci->col_costs[col] = subtree_costs;
986 /* If we are at the root and we achieved an acceptable amount of optimization, we finish. */
988 if(is_root && (ci->cloud->inevit * stop_percentage < ci->cloud->inevit - sum_costs)) {
989 assert(best_col != -1);
996 int *front = FRONT_BASE(ci->mst_parent, parent_col);
997 front[child_nr] = best_col;
1003 static void populate_cloud(co2_t *env, co2_cloud_t *cloud, affinity_node_t *a, int curr_costs)
1005 be_ifg_t *ifg = env->co->cenv->ifg;
1006 co2_cloud_irn_t *ci = get_co2_cloud_irn(env, a->irn);
1013 /* mark the node as visited and add it to the cloud. */
1015 list_add(&ci->cloud_list, &cloud->members_head);
1017 DB((env->dbg, LEVEL_2, "\t%+F\n", ci->inh.irn));
1019 /* determine the nodes costs */
1020 co_gs_foreach_neighb(a, n) {
1022 DB((env->dbg, LEVEL_3, "\t\tneigh %+F cost %d\n", n->irn, n->costs));
1023 if(be_ifg_connected(ifg, a->irn, n->irn))
1024 cloud->inevit += n->costs;
1027 /* add the node's cost to the total costs of the cloud. */
1029 cloud->costs += costs;
1030 cloud->n_constr += is_constrained(env, &ci->inh);
1031 cloud->freedom += bitset_popcnt(get_adm(env, &ci->inh));
1032 cloud->max_degree = MAX(cloud->max_degree, ci->inh.aff->degree);
1035 /* If this is the heaviest node in the cloud, set it as the cloud's master. */
1036 if(costs >= curr_costs) {
1041 /* add all the neighbors of the node to the cloud. */
1042 co_gs_foreach_neighb(a, n) {
1043 affinity_node_t *an = get_affinity_info(env->co, n->irn);
1045 populate_cloud(env, cloud, an, curr_costs);
1049 static co2_cloud_t *new_cloud(co2_t *env, affinity_node_t *a)
1051 co2_cloud_t *cloud = phase_alloc(&env->ph, sizeof(cloud[0]));
1052 co2_cloud_irn_t *ci;
1055 DBG((env->dbg, LEVEL_2, "new cloud with %+F\n", a->irn));
1056 memset(cloud, 0, sizeof(cloud[0]));
1057 INIT_LIST_HEAD(&cloud->members_head);
1058 INIT_LIST_HEAD(&cloud->list);
1059 list_add(&cloud->list, &env->cloud_head);
1060 cloud->best_costs = INT_MAX;
1063 populate_cloud(env, cloud, a, 0);
1064 cloud->freedom = (cloud->n_memb * env->n_regs) / cloud->freedom;
1066 /* Allocate space for the best colors array, where the best coloring is saved. */
1067 // cloud->best_cols = phase_alloc(&env->ph, cloud->n_memb * sizeof(cloud->best_cols[0]));
1069 /* Also allocate space for the node sequence and compute that sequence. */
1070 cloud->seq = phase_alloc(&env->ph, cloud->n_memb * sizeof(cloud->seq[0]));
1073 list_for_each_entry(co2_cloud_irn_t, ci, &cloud->members_head, cloud_list) {
1075 cloud->seq[i++] = ci;
1077 DBG((env->dbg, LEVEL_2, "cloud cost %d, freedom %f\n", cloud->costs, cloud->freedom));
1082 static void apply_coloring(co2_cloud_irn_t *ci, col_t col, int depth)
1084 ir_node *irn = ci->inh.irn;
1085 int *front = FRONT_BASE(ci, col);
1087 struct list_head changed;
1089 INIT_LIST_HEAD(&changed);
1091 DBG((ci->cloud->env->dbg, LEVEL_2, "%2{firm:indent}setting %+F to %d\n", depth, irn, col));
1092 ok = change_color_single(ci->cloud->env, irn, col, &changed, depth);
1093 assert(ok && "Color changing may not fail while committing the coloring");
1094 materialize_coloring(&changed);
1096 for(i = 0; i < ci->mst_n_childs; ++i) {
1097 apply_coloring(ci->mst_childs[i], front[i], depth + 1);
1101 static void process_cloud(co2_cloud_t *cloud)
1103 co2_t *env = cloud->env;
1104 int n_regs = env->n_regs;
1106 int *mst_edges = malloc(cloud->n_memb * cloud->n_memb * sizeof(mst_edges[0]));
1109 struct list_head changed;
1114 memset(mst_edges, 0, cloud->n_memb * cloud->n_memb * sizeof(mst_edges[0]));
1116 /* Collect all edges in the cloud on an obstack and sort the increasingly */
1117 obstack_init(&cloud->obst);
1118 for(i = 0; i < cloud->n_memb; ++i) {
1119 co2_cloud_irn_t *ci = cloud->seq[i];
1122 co_gs_foreach_neighb(ci->inh.aff, n) {
1123 co2_cloud_irn_t *ni = get_co2_cloud_irn(cloud->env, n->irn);
1124 if(ci->index < ni->index) {
1129 obstack_grow(&cloud->obst, &e, sizeof(e));
1134 edges = obstack_finish(&cloud->obst);
1135 qsort(edges, n_edges, sizeof(edges[0]), cmp_edges);
1137 /* Compute the maximum spanning tree using Kruskal/Union-Find */
1138 DBG((env->dbg, LEVEL_2, "computing spanning tree of cloud with master %+F\n", cloud->master->inh.irn));
1139 for(i = 0; i < n_edges; ++i) {
1140 edge_t *e = &edges[i];
1141 co2_cloud_irn_t *rs = find_mst_root(e->src);
1142 co2_cloud_irn_t *rt = find_mst_root(e->tgt);
1144 /* if the union/find roots are different */
1146 int si = e->src->index;
1147 int ti = e->tgt->index;
1149 /* unify the sets */
1150 rs->mst_parent = rt;
1151 DBG((env->dbg, LEVEL_2, "\tadding edge %+F -- %+F cost %d\n", rs->inh.irn, rt->inh.irn, e->costs));
1153 /* this edge is in the MST, so set it in the bitset. */
1154 mst_edges[si * cloud->n_memb + ti] = e->costs;
1155 mst_edges[ti * cloud->n_memb + si] = e->costs;
1158 obstack_free(&cloud->obst, edges);
1160 cloud->master->mst_parent = cloud->master;
1161 cloud->mst_root = cloud->master;
1162 q = new_pdeq1(cloud->master);
1163 while(!pdeq_empty(q)) {
1164 co2_cloud_irn_t *ci = pdeq_getl(q);
1165 int ofs = ci->index * cloud->n_memb;
1166 int end = ofs + cloud->n_memb;
1169 ci->mst_n_childs = 0;
1170 for(i = ofs; i < end; ++i) {
1171 if(mst_edges[i] != 0) {
1172 int other = i - ofs;
1173 co2_cloud_irn_t *child = cloud->seq[i - ofs];
1175 /* put the child to the worklist */
1176 pdeq_putr(q, child);
1178 /* make ci the parent of the child and add the child to the children array of the parent */
1179 child->mst_parent = ci;
1180 child->mst_costs = mst_edges[i];
1182 obstack_ptr_grow(&cloud->obst, child);
1184 mst_edges[other * cloud->n_memb + ci->index] = 0;
1189 obstack_ptr_grow(&cloud->obst, NULL);
1190 ci->mst_childs = obstack_finish(&cloud->obst);
1196 DBG((env->dbg, LEVEL_3, "mst:\n"));
1197 for(i = 0; i < cloud->n_memb; ++i) {
1198 co2_cloud_irn_t *ci = cloud->seq[i];
1199 DBG((env->dbg, LEVEL_3, "\t%+F -> %+F\n", ci->inh.irn, ci->mst_parent->inh.irn));
1202 for(i = 0; i < cloud->n_memb; ++i) {
1203 co2_cloud_irn_t *ci = cloud->seq[i];
1204 int n_childs = ci->mst_n_childs;
1207 ci->col_costs = obstack_alloc(&cloud->obst, n_regs * sizeof(ci->col_costs[0]));
1208 ci->tmp_coloring = obstack_alloc(&cloud->obst, n_regs * sizeof(ci->tmp_coloring[0]));
1209 ci->fronts = obstack_alloc(&cloud->obst, n_regs * n_childs * sizeof(ci->fronts[0]));
1210 ci->color_badness = obstack_alloc(&cloud->obst, n_regs * sizeof(ci->fronts[0]));
1211 memset(ci->color_badness, 0, n_regs * sizeof(ci->color_badness[0]));
1212 memset(ci->col_costs, 0, n_regs * sizeof(ci->col_costs[0]));
1213 memset(ci->tmp_coloring, 0, n_regs * sizeof(ci->tmp_coloring[0]));
1214 memset(ci->fronts, 0, n_regs * n_childs * sizeof(ci->fronts[0]));
1216 for(j = 0; j < env->n_regs; j++)
1217 ci->col_costs[j] = INT_MAX;
1221 determine_color_badness(cloud->mst_root, 0);
1222 best_col = coalesce_top_down(cloud->mst_root, -1, 0);
1223 unfix_subtree(cloud->mst_root);
1224 apply_coloring(cloud->mst_root, best_col, 0);
1226 /* The coloring should represent the one with the best costs. */
1227 //materialize_coloring(&changed);
1228 DBG((env->dbg, LEVEL_2, "\tbest coloring for root %+F was %d costing %d\n",
1229 cloud->mst_root->inh.irn, best_col, examine_subtree_coloring(cloud->mst_root, best_col)));
1231 /* Fix all nodes in the cloud. */
1232 for(i = 0; i < cloud->n_memb; ++i)
1233 cloud->seq[i]->inh.fixed = 1;
1235 /* Free all space used while optimizing this cloud. */
1236 obstack_free(&cloud->obst, NULL);
1239 static int cloud_costs(co2_cloud_t *cloud)
1244 for(i = 0; i < cloud->n_memb; ++i) {
1245 co2_irn_t *ci = (co2_irn_t *) cloud->seq[i];
1246 col_t col = get_col(cloud->env, ci->irn);
1247 co_gs_foreach_neighb(ci->aff, n) {
1248 col_t n_col = get_col(cloud->env, n->irn);
1249 costs += col != n_col ? n->costs : 0;
1256 static void process(co2_t *env)
1260 co2_cloud_t **clouds;
1265 int final_costs = 0;
1268 co_gs_foreach_aff_node(env->co, a) {
1269 co2_cloud_irn_t *ci = get_co2_cloud_irn(env, a->irn);
1272 co2_cloud_t *cloud = new_cloud(env, a);
1278 clouds = xmalloc(n_clouds * sizeof(clouds[0]));
1279 list_for_each_entry(co2_cloud_t, pos, &env->cloud_head, list)
1281 qsort(clouds, n_clouds, sizeof(clouds[0]), cmp_clouds_gt);
1283 for(i = 0; i < n_clouds; ++i) {
1284 init_costs += cloud_costs(clouds[i]);
1285 process_cloud(clouds[i]);
1286 all_costs += clouds[i]->costs;
1287 final_costs += cloud_costs(clouds[i]);
1289 /* Dump the IFG if the user demanded it. */
1290 if (dump_flags & DUMP_CLOUD) {
1294 ir_snprintf(buf, sizeof(buf), "ifg_%F_%s_cloud_%d.dot", env->co->irg, env->co->cls->name, i);
1295 if(f = fopen(buf, "wt")) {
1296 be_ifg_dump_dot(env->co->cenv->ifg, env->co->irg, f, &ifg_dot_cb, env);
1302 DB((env->dbg, LEVEL_1, "all costs: %d, init costs: %d, final costs: %d\n", all_costs, init_costs, final_costs));
1307 static void writeback_colors(co2_t *env)
1309 const arch_env_t *aenv = env->co->aenv;
1312 for(irn = env->touched; irn; irn = irn->touched_next) {
1313 const arch_register_t *reg = arch_register_for_index(env->co->cls, irn->orig_col);
1314 arch_set_irn_register(aenv, irn->irn, reg);
1320 ___ _____ ____ ____ ___ _____ ____ _
1321 |_ _| ___/ ___| | _ \ / _ \_ _| | _ \ _ _ _ __ ___ _ __ (_)_ __ __ _
1322 | || |_ | | _ | | | | | | || | | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1323 | || _|| |_| | | |_| | |_| || | | |_| | |_| | | | | | | |_) | | | | | (_| |
1324 |___|_| \____| |____/ \___/ |_| |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1328 static const char *get_dot_color_name(int col)
1330 static const char *names[] = {
1364 return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1367 static const char *get_dot_shape_name(co2_t *env, co2_irn_t *ci)
1369 arch_register_req_t req;
1371 arch_get_register_req(env->co->aenv, &req, ci->irn, BE_OUT_POS(0));
1372 if(arch_register_req_is(&req, limited))
1384 static void ifg_dump_graph_attr(FILE *f, void *self)
1386 fprintf(f, "overlay=false");
1389 static int ifg_is_dump_node(void *self, ir_node *irn)
1392 return !arch_irn_is(env->co->aenv, irn, ignore);
1395 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1398 co2_irn_t *ci = get_co2_irn(env, irn);
1404 co2_cloud_irn_t *cci = (void *) ci;
1405 if (cci->cloud && cci->cloud->mst_root == cci)
1408 if(cci->cloud && cci->cloud->mst_root)
1409 snprintf(buf, sizeof(buf), "%+F", cci->cloud->mst_root->inh.irn);
1412 ir_fprintf(f, "label=\"%+F%s\" style=filled peripheries=%d color=%s shape=%s", irn, buf, peri,
1413 get_dot_color_name(get_col(env, irn)), get_dot_shape_name(env, ci));
1416 static void ifg_dump_at_end(FILE *file, void *self)
1424 list_for_each_entry(co2_cloud_t, pos, &env->cloud_head, list) {
1427 for(i = 0; i < pos->n_memb - 1; ++i) {
1428 fprintf(file, "\tn%d -- n%d [style=dotted color=green];\n", get_irn_idx(pos->seq[i]->inh.irn), get_irn_idx(pos->seq[i+1]->inh.irn));
1434 co_gs_foreach_aff_node(env->co, a) {
1435 co2_cloud_irn_t *ai = get_co2_cloud_irn(env, a->irn);
1436 int idx = get_irn_idx(a->irn);
1439 if(ai->mst_parent != ai)
1440 fprintf(file, "\tn%d -- n%d [style=dotted color=blue arrowhead=normal];\n", idx, get_irn_idx(ai->mst_parent->inh.irn));
1442 co_gs_foreach_neighb(a, n) {
1443 int nidx = get_irn_idx(n->irn);
1444 co2_cloud_irn_t *ci = get_co2_cloud_irn(env, n->irn);
1447 const char *color = get_col(env, a->irn) == get_col(env, n->irn) ? "black" : "red";
1448 const char *arr = "arrowhead=dot arrowtail=dot";
1450 if(ci->mst_parent == ai)
1451 arr = "arrowtail=normal";
1452 else if(ai->mst_parent == ci)
1453 arr = "arrowhead=normal";
1455 fprintf(file, "\tn%d -- n%d [label=\"%d\" %s style=dashed color=%s weight=0.01];\n", idx, nidx, n->costs, arr, color);
1462 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1464 ifg_dump_graph_attr,
1472 void co_solve_heuristic_new(copy_opt_t *co)
1478 phase_init(&env.ph, "co2", co->cenv->birg->irg, PHASE_DEFAULT_GROWTH, co2_irn_init);
1482 env.n_regs = co->cls->n_regs;
1483 env.ignore_regs = bitset_alloca(co->cls->n_regs);
1484 arch_put_non_ignore_regs(co->aenv, co->cls, env.ignore_regs);
1485 bitset_flip_all(env.ignore_regs);
1486 be_abi_put_ignore_regs(co->cenv->birg->abi, co->cls, env.ignore_regs);
1487 FIRM_DBG_REGISTER(env.dbg, "firm.be.co2");
1488 INIT_LIST_HEAD(&env.cloud_head);
1490 if(dump_flags & DUMP_BEFORE) {
1491 ir_snprintf(buf, sizeof(buf), "ifg_%F_%s_before.dot", co->irg, co->cls->name);
1492 if(f = fopen(buf, "wt")) {
1493 be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &env);
1500 if(dump_flags & DUMP_AFTER) {
1501 ir_snprintf(buf, sizeof(buf), "ifg_%F_%s_after.dot", co->irg, co->cls->name);
1502 if(f = fopen(buf, "wt")) {
1503 be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &env);
1508 writeback_colors(&env);
1509 phase_free(&env.ph);