2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief PBQP based register allocation.
23 * @author Thomas Bersch
25 * @version $Id: bechordal.c 26750 2009-11-27 09:37:43Z bersch $
28 /* miscellaneous includes */
37 #include "iredges_t.h"
42 /* libfirm/ir/be includes */
47 #include "bechordal_common.h"
48 #include "bechordal.h"
49 #include "bechordal_t.h"
57 #include "becopyopt.h"
64 #include "heuristical_co.h"
65 #include "heuristical_co_ld.h"
67 #include "html_dumper.h"
68 #include "pbqp_node_t.h"
69 #include "pbqp_node.h"
75 static int use_exec_freq = true;
76 static int use_late_decision = true;
78 typedef struct _be_pbqp_alloc_env_t {
79 pbqp *pbqp_inst; /**< PBQP instance for register allocation */
80 be_irg_t *birg; /**< Back-end IRG session. */
81 ir_graph *irg; /**< The graph under examination. */
82 const arch_register_class_t *cls; /**< Current processed register class */
84 bitset_t *ignored_regs;
85 pbqp_matrix *ife_matrix_template;
86 pbqp_matrix *aff_matrix_template;
88 unsigned *restr_nodes;
89 unsigned *ife_edge_num;
90 be_chordal_env_t *env;
91 } be_pbqp_alloc_env_t;
94 #define is_Reg_Phi(irn) (is_Phi(irn) && mode_is_data(get_irn_mode(irn)))
95 #define get_Perm_src(irn) (get_irn_n(get_Proj_pred(irn), get_Proj_proj(irn)))
96 #define is_Perm_Proj(irn) (is_Proj(irn) && be_is_Perm(get_Proj_pred(irn)))
97 #define insert_edge(pbqp, src_node, trg_node, template_matrix) (add_edge_costs(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node), pbqp_matrix_copy(pbqp, template_matrix)))
98 #define get_free_regs(restr_nodes, cls, irn) (arch_register_class_n_regs(cls) - restr_nodes[get_irn_idx(irn)])
100 static inline int is_2addr_code(const arch_register_req_t *req)
102 return (req->type & arch_register_req_type_should_be_same) != 0;
105 static const lc_opt_table_entry_t options[] = {
106 LC_OPT_ENT_BOOL ("exec_freq", "use exec_freq", &use_exec_freq),
107 LC_OPT_ENT_BOOL ("late_decision", "use late decision for register allocation", &use_late_decision),
112 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
119 n = strlen(env->birg->main_env->cup_name);
120 tu_name = XMALLOCN(char, n + 1);
121 strcpy(tu_name, env->birg->main_env->cup_name);
122 for (i = 0; i < n; ++i)
123 if (tu_name[i] == '.')
126 ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
128 result = fopen(buf, "wt");
129 if (result == NULL) {
130 panic("Couldn't open '%s' for writing.", buf);
138 static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn)
140 const arch_register_class_t *cls = pbqp_alloc_env->cls;
141 pbqp *pbqp_inst = pbqp_alloc_env->pbqp_inst;
142 bitset_t *ignored_regs = pbqp_alloc_env->ignored_regs;
143 unsigned colors_n = arch_register_class_n_regs(cls);
144 unsigned cntConstrains = 0;
146 /* create costs vector depending on register constrains */
147 struct vector *costs_vector = vector_alloc(pbqp_inst, colors_n);
149 /* set costs depending on register constrains */
151 for (idx = 0; idx < colors_n; idx++) {
152 if (bitset_is_set(ignored_regs, idx) || !arch_reg_out_is_allocatable(irn, arch_register_for_index(cls, idx))) {
153 vector_set(costs_vector, idx, INF_COSTS);
158 /* add vector to pbqp node */
159 add_node_costs(pbqp_inst, get_irn_idx(irn), costs_vector);
160 pbqp_alloc_env->restr_nodes[get_irn_idx(irn)] = cntConstrains;
163 static void insert_ife_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node)
165 pbqp *pbqp = pbqp_alloc_env->pbqp_inst;
166 const arch_register_class_t *cls = pbqp_alloc_env->cls;
167 pbqp_matrix *ife_matrix_template = pbqp_alloc_env->ife_matrix_template;
168 unsigned *restr_nodes = pbqp_alloc_env->restr_nodes;
170 if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
172 /* increase ife edge counter */
173 pbqp_alloc_env->ife_edge_num[get_irn_idx(src_node)]++;
174 pbqp_alloc_env->ife_edge_num[get_irn_idx(trg_node)]++;
176 /* do useful optimization to speed up pbqp solving (we can do this because we know our matrix) */
177 if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
178 unsigned src_idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
179 unsigned trg_idx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
180 assert(src_idx != trg_idx && "Interfering nodes could not have the same register!");
183 if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
184 if (get_free_regs(restr_nodes, cls, src_node) == 1) {
185 unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
186 vector_set(get_node(pbqp, get_irn_idx(trg_node))->costs, idx, INF_COSTS);
189 unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
190 vector_set(get_node(pbqp, get_irn_idx(src_node))->costs, idx, INF_COSTS);
195 /* insert interference edge */
196 insert_edge(pbqp, src_node, trg_node, ife_matrix_template);
200 static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node, int pos)
202 pbqp *pbqp = pbqp_alloc_env->pbqp_inst;
203 const arch_register_class_t *cls = pbqp_alloc_env->cls;
204 unsigned *restr_nodes = pbqp_alloc_env->restr_nodes;
205 pbqp_matrix *afe_matrix = pbqp_matrix_alloc(pbqp, arch_register_class_n_regs(cls), arch_register_class_n_regs(cls));
206 unsigned colors_n = arch_register_class_n_regs(cls);
208 if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
210 /* get exec_freq for copy_block */
211 ir_node *root_bl = get_nodes_block(src_node);
212 ir_node *copy_bl = is_Phi(src_node) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
213 unsigned long res = get_block_execfreq_ulong(pbqp_alloc_env->birg->exec_freq, copy_bl);
215 /* create afe-matrix */
217 for (row = 0; row < colors_n; row++) {
218 for (col = 0; col < colors_n; col++) {
220 pbqp_matrix_set(afe_matrix, row, col, (num)res);
225 afe_matrix = pbqp_alloc_env->aff_matrix_template;
228 /* do useful optimization to speed up pbqp solving */
229 if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
232 if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
233 if (get_free_regs(restr_nodes, cls, src_node) == 1) {
234 unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
235 vector_add_matrix_col(get_node(pbqp, get_irn_idx(trg_node))->costs, afe_matrix, regIdx);
238 unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
239 vector_add_matrix_col(get_node(pbqp, get_irn_idx(src_node))->costs, afe_matrix, regIdx);
244 /* insert interference edge */
245 insert_edge(pbqp, src_node, trg_node, afe_matrix);
249 static void create_affinity_edges(ir_node *irn, void *env)
251 be_pbqp_alloc_env_t *pbqp_alloc_env = env;
252 const arch_register_class_t *cls = pbqp_alloc_env->cls;
253 const arch_register_req_t *req = arch_get_register_req_out(irn);
256 if (is_Reg_Phi(irn)) { /* Phis */
257 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
258 ir_node *arg = get_irn_n(irn, pos);
260 if (!arch_irn_consider_in_reg_alloc(cls, arg))
263 /* no edges to itself */
268 inser_afe_edge(pbqp_alloc_env, irn, arg, pos);
271 else if (is_Perm_Proj(irn)) { /* Perms */
272 ir_node *arg = get_Perm_src(irn);
273 if (!arch_irn_consider_in_reg_alloc(cls, arg))
276 inser_afe_edge(pbqp_alloc_env, irn, arg, -1);
278 else { /* 2-address code */
279 if (is_2addr_code(req)) {
280 const unsigned other = req->other_same;
283 for (i = 0; 1U << i <= other; ++i) {
284 if (other & (1U << i)) {
285 ir_node *other = get_irn_n(skip_Proj(irn), i);
286 if (!arch_irn_consider_in_reg_alloc(cls, other))
289 /* no edges to itself */
294 inser_afe_edge(pbqp_alloc_env, irn, other, i);
301 static void create_pbqp_coloring_instance(ir_node *block, void *data)
303 be_pbqp_alloc_env_t *pbqp_alloc_env = data;
304 be_lv_t *lv = pbqp_alloc_env->lv;
305 const arch_register_class_t *cls = pbqp_alloc_env->cls;
306 plist_t *rpeo = pbqp_alloc_env->rpeo;
307 pbqp *pbqp_inst = pbqp_alloc_env->pbqp_inst;
308 unsigned *restr_nodes = pbqp_alloc_env->restr_nodes;
309 pqueue_t *queue = new_pqueue();
310 pqueue_t *restr_nodes_queue = new_pqueue();
311 plist_t *temp_list = plist_new();
312 plist_t *sorted_list = plist_new();
314 ir_nodeset_t live_nodes;
316 ir_node *last_element = NULL;
318 /* first, determine the pressure */
319 /* (this is only for compatibility with copymin optimization, it's not needed for pbqp coloring) */
320 create_borders(block, pbqp_alloc_env->env);
322 /* calculate living nodes for the first step */
323 ir_nodeset_init(&live_nodes);
324 be_liveness_end_of_block(lv, cls, block, &live_nodes);
326 /* create pbqp nodes, interference edges and reverse perfect elimination order */
327 sched_foreach_reverse(block, irn) {
329 ir_nodeset_iterator_t iter;
331 if (get_irn_mode(irn) == mode_T) {
332 const ir_edge_t *edge;
333 foreach_out_edge(irn, edge) {
334 ir_node *proj = get_edge_src_irn(edge);
335 if (!arch_irn_consider_in_reg_alloc(cls, proj))
338 /* create pbqp source node if it dosn't exist */
339 if (get_node(pbqp_inst, get_irn_idx(proj)) == NULL) {
340 create_pbqp_node(pbqp_alloc_env, proj);
343 /* create nodes and interference edges */
344 foreach_ir_nodeset(&live_nodes, live, iter) {
345 /* create pbqp source node if it dosn't exist */
346 if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
347 create_pbqp_node(pbqp_alloc_env, live);
350 /* no edges to itself */
355 insert_ife_edge(pbqp_alloc_env, proj, live);
360 if (arch_irn_consider_in_reg_alloc(cls, irn)) {
361 /* create pbqp source node if it dosn't exist */
362 if (get_node(pbqp_inst, get_irn_idx(irn)) == NULL) {
363 create_pbqp_node(pbqp_alloc_env, irn);
366 /* create nodes and interference edges */
367 foreach_ir_nodeset(&live_nodes, live, iter) {
368 /* create pbqp source node if it dosn't exist */
369 if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
370 create_pbqp_node(pbqp_alloc_env, live);
373 /* no edges to itself */
378 /* insert interference edge */
379 insert_ife_edge(pbqp_alloc_env, irn, live);
384 /* get living nodes for next step */
386 be_liveness_transfer(cls, irn, &live_nodes);
389 /* order nodes for perfect elimination order */
390 if (get_irn_mode(irn) == mode_T) {
391 bool allHaveIFEdges = true;
393 const ir_edge_t *edge;
394 foreach_out_edge(irn, edge) {
395 ir_node *proj = get_edge_src_irn(edge);
396 if (!arch_irn_consider_in_reg_alloc(cls, proj))
399 /* insert proj node into priority queue (descending by the number of interference edges) */
400 if (get_free_regs(restr_nodes, cls, proj) <= 4) {
401 pqueue_put(restr_nodes_queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
404 pqueue_put(queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
407 /* skip last step if there is no last_element */
408 if(last_element == NULL)
411 /* check if proj has an if edge to last_element (at this time pbqp contains only if edges) */
412 if(get_edge(pbqp_inst, proj->node_idx, last_element->node_idx) == NULL && get_edge(pbqp_inst, last_element->node_idx, proj->node_idx) == NULL) {
413 allHaveIFEdges = false; /* there is no if edge between proj and last_element */
417 if(last_element != NULL && allHaveIFEdges) {
418 if (get_free_regs(restr_nodes, cls, last_element) <= 4) {
419 pqueue_put(restr_nodes_queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
422 pqueue_put(queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
424 plist_erase(temp_list, plist_find_value(temp_list, get_node(pbqp_inst, last_element->node_idx)));
428 /* first insert all restricted proj nodes */
429 while (!pqueue_empty(restr_nodes_queue)) {
430 plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue))));
433 /* insert proj nodes descending by their number of interference edges */
434 while (!pqueue_empty(queue)) {
435 plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue))));
438 /* invert sorted list */
439 foreach_plist(sorted_list, el) {
440 plist_insert_front(temp_list, el->data);
443 plist_clear(sorted_list);
447 if (arch_irn_consider_in_reg_alloc(cls, irn)) {
448 // remember last colorable node
450 plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn)));
453 // node not colorable, so ignore it
459 /* insert nodes into reverse perfect elimination order */
460 foreach_plist(temp_list, el) {
461 plist_insert_back(rpeo, el->data);
464 /* free reserved memory */
465 ir_nodeset_destroy(&live_nodes);
466 plist_free(temp_list);
467 plist_free(sorted_list);
469 del_pqueue(restr_nodes_queue);
472 static void insert_perms(ir_node *block, void *data)
475 * Start silent in the start block.
476 * The silence remains until the first barrier is seen.
477 * Each other block is begun loud.
479 be_chordal_env_t *env = data;
481 int silent = block == get_irg_start_block(get_irn_irg(block));
484 * If the block is the start block search the barrier and
485 * start handling constraints from there.
487 for (irn = sched_first(block); !sched_is_end(irn);) {
488 int silent_old = silent; /* store old silent value */
489 if (be_is_Barrier(irn))
490 silent = !silent; /* toggle silent flag */
492 be_insn_t *insn = chordal_scan_insn(env, irn);
493 irn = insn->next_insn;
498 if (!insn->has_constraints)
501 pre_process_constraints(env, &insn);
505 static void be_pbqp_coloring(be_chordal_env_t *env)
507 ir_graph *irg = env->irg;
508 be_irg_t *birg = env->birg;
509 const arch_register_class_t *cls = env->cls;
511 plist_element_t *element = NULL;
512 unsigned colors_n = arch_register_class_n_regs(cls);
513 be_pbqp_alloc_env_t pbqp_alloc_env;
518 ir_timer_t *t_ra_pbqp_alloc_create = ir_timer_new();
519 ir_timer_t *t_ra_pbqp_alloc_solve = ir_timer_new();
520 ir_timer_t *t_ra_pbqp_alloc_create_aff = ir_timer_new();
522 printf("#### ----- === Allocating registers of %s (%s) ===\n", cls->name, get_entity_name(get_irg_entity(irg)));
524 lv = be_assure_liveness(birg);
525 be_liveness_assure_sets(lv);
526 be_liveness_assure_chk(lv);
530 dom_tree_walk_irg(irg, insert_perms, NULL, env);
532 /* dump graph after inserting perms */
533 if (env->opts->dump_flags & BE_CH_DUMP_CONSTR) {
535 snprintf(buf, sizeof(buf), "-%s-constr", cls->name);
536 be_dump(irg, buf, dump_ir_block_graph_sched);
540 /* initialize pbqp allocation data structure */
541 pbqp_alloc_env.pbqp_inst = alloc_pbqp(get_irg_last_idx(irg)); /* initialize pbqp instance */
542 pbqp_alloc_env.birg = birg;
543 pbqp_alloc_env.cls = cls;
544 pbqp_alloc_env.irg = irg;
545 pbqp_alloc_env.lv = lv;
546 pbqp_alloc_env.ignored_regs = bitset_malloc(colors_n);
547 pbqp_alloc_env.rpeo = plist_new();
548 pbqp_alloc_env.restr_nodes = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
549 pbqp_alloc_env.ife_edge_num = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
550 pbqp_alloc_env.env = env;
551 be_put_ignore_regs(birg, cls, pbqp_alloc_env.ignored_regs); /* get ignored registers */
554 /* create costs matrix template for interference edges */
555 struct pbqp_matrix *ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
557 for (row = 0, col=0; row < colors_n; row++, col++)
558 pbqp_matrix_set(ife_matrix, row, col, INF_COSTS);
560 pbqp_alloc_env.ife_matrix_template = ife_matrix;
563 if (!use_exec_freq) {
564 /* create costs matrix template for affinity edges */
565 struct pbqp_matrix *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
567 for (row = 0; row < colors_n; row++) {
568 for (col = 0; col < colors_n; col++) {
570 pbqp_matrix_set(afe_matrix, row, col, 2);
573 pbqp_alloc_env.aff_matrix_template = afe_matrix;
577 /* create pbqp instance */
579 ir_timer_reset_and_start(t_ra_pbqp_alloc_create);
582 dom_tree_walk_irg(irg, create_pbqp_coloring_instance , NULL, &pbqp_alloc_env);
584 ir_timer_stop(t_ra_pbqp_alloc_create);
588 /* set up affinity edges */
590 ir_timer_reset_and_start(t_ra_pbqp_alloc_create_aff);
592 foreach_plist(pbqp_alloc_env.rpeo, element) {
593 pbqp_node *node = element->data;
594 ir_node *irn = get_idx_irn(irg, node->index);
596 create_affinity_edges(irn, &pbqp_alloc_env);
599 ir_timer_stop(t_ra_pbqp_alloc_create_aff);
604 // dump graph before solving pbqp
605 FILE *file_before = my_open(env, "", "-pbqp_coloring.html");
606 set_dumpfile(pbqp_alloc_env.pbqp_inst, file_before);
609 /* print out reverse perfect eleminiation order */
611 plist_element_t *elements;
612 foreach_plist(pbqp_alloc_env.rpeo, elements) {
613 pbqp_node *node = elements->data;
614 printf(" %d(%lu);", node->index, get_idx_irn(irg, node->index)->node_nr);
620 /* solve pbqp instance */
622 ir_timer_reset_and_start(t_ra_pbqp_alloc_solve);
624 if(use_late_decision) {
625 solve_pbqp_heuristical_co_ld(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
628 solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
631 ir_timer_stop(t_ra_pbqp_alloc_solve);
633 num solution = get_solution(pbqp_alloc_env.pbqp_inst);
634 assert(solution != INF_COSTS && "No PBQP solution found");
638 foreach_plist(pbqp_alloc_env.rpeo, element) {
639 pbqp_node *node = element->data;
640 ir_node *irn = get_idx_irn(irg, node->index);
641 num color = get_node_solution(pbqp_alloc_env.pbqp_inst, node->index);
642 const arch_register_t *reg = arch_register_for_index(cls, color);
644 arch_set_irn_register(irn, reg);
649 printf("%-20s: %8.3lf msec\n", "pbqp alloc create",
650 (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create) / 1000.0);
651 printf("%-20s: %8.3lf msec\n", "pbqp alloc solve",
652 (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_solve) / 1000.0);
653 printf("%-20s: %8.3lf msec\n", "pbqp alloc create aff",
654 (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create_aff) / 1000.0);
658 /* free reserved memory */
662 bitset_free(pbqp_alloc_env.ignored_regs);
663 free_pbqp(pbqp_alloc_env.pbqp_inst);
664 plist_free(pbqp_alloc_env.rpeo);
665 xfree(pbqp_alloc_env.restr_nodes);
666 xfree(pbqp_alloc_env.ife_edge_num);
671 * Initializes this module.
673 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pbqp_coloring);
674 void be_init_pbqp_coloring(void)
676 lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
677 lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
678 lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
679 lc_opt_entry_t *coloring_grp = lc_opt_get_grp(chordal_grp, "coloring");
680 lc_opt_entry_t *pbqp_grp = lc_opt_get_grp(coloring_grp, "pbqp");
682 static be_ra_chordal_coloring_t coloring = {
686 lc_opt_add_table(pbqp_grp, options);
687 be_register_chordal_coloring("pbqp", &coloring);