Allow the first line to be a comment.
[libfirm] / ir / be / bepbqpcoloring.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       PBQP based register allocation.
23  * @author      Thomas Bersch
24  * @date        27.11.2009
25  */
26
27 /* miscellaneous includes */
28 #include "config.h"
29
30 #include "debug.h"
31 #include "error.h"
32
33 #include "irdom.h"
34 #include "irdump.h"
35 #include "iredges_t.h"
36 #include "irprintf.h"
37 #include "irgwalk.h"
38 #include "irtools.h"
39 #include "time.h"
40 #include "execfreq_t.h"
41 #include "bipartite.h"
42
43 /* libfirm/ir/be includes */
44 #include "bearch.h"
45 #include "beirg.h"
46 #include "besched.h"
47 #include "bemodule.h"
48 #include "bechordal_common.h"
49 #include "bechordal.h"
50 #include "bechordal_t.h"
51 #include "beinsn_t.h"
52 #include "benode.h"
53 #include "belive.h"
54 #include "belive_t.h"
55 #include "beutil.h"
56 #include "plist.h"
57 #include "pqueue.h"
58 #include "becopyopt.h"
59
60 /* pbqp includes */
61 #include "kaps.h"
62 #include "matrix.h"
63 #include "vector.h"
64 #include "vector_t.h"
65 #include "heuristical_co.h"
66 #include "heuristical_co_ld.h"
67 #include "pbqp_t.h"
68 #include "html_dumper.h"
69 #include "pbqp_node_t.h"
70 #include "pbqp_node.h"
71 #include "pbqp_edge_t.h"
72
73 #define TIMER                 0
74 #define PRINT_RPEO            0
75 #define USE_BIPARTIT_MATCHING 0
76 #define DO_USEFUL_OPT         1
77
78
79 static int use_exec_freq     = true;
80 static int use_late_decision = false;
81
82 typedef struct be_pbqp_alloc_env_t {
83         pbqp_t                      *pbqp_inst;         /**< PBQP instance for register allocation */
84         ir_graph                    *irg;               /**< The graph under examination. */
85         const arch_register_class_t *cls;               /**< Current processed register class */
86         be_lv_t                     *lv;
87         bitset_t                    *allocatable_regs;
88         pbqp_matrix_t               *ife_matrix_template;
89         pbqp_matrix_t               *aff_matrix_template;
90         plist_t                     *rpeo;
91         unsigned                    *restr_nodes;
92         unsigned                    *ife_edge_num;
93         ir_execfreq_int_factors      execfreq_factors;
94         be_chordal_env_t            *env;
95 } be_pbqp_alloc_env_t;
96
97
98 #define is_Reg_Phi(irn)                                        (is_Phi(irn) && mode_is_data(get_irn_mode(irn)))
99 #define get_Perm_src(irn)                                      (get_irn_n(get_Proj_pred(irn), get_Proj_proj(irn)))
100 #define is_Perm_Proj(irn)                                      (is_Proj(irn) && be_is_Perm(get_Proj_pred(irn)))
101 #define insert_edge(pbqp, src_node, trg_node, template_matrix) (add_edge_costs(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node), pbqp_matrix_copy(pbqp, template_matrix)))
102 #define get_free_regs(restr_nodes, cls, irn)                   (arch_register_class_n_regs(cls) - restr_nodes[get_irn_idx(irn)])
103
104 static const lc_opt_table_entry_t options[] = {
105         LC_OPT_ENT_BOOL("exec_freq", "use exec_freq",  &use_exec_freq),
106         LC_OPT_ENT_BOOL("late_decision", "use late decision for register allocation",  &use_late_decision),
107         LC_OPT_LAST
108 };
109
110 #if KAPS_DUMP
111 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
112 {
113         FILE       *result;
114         char        buf[1024];
115         size_t      i;
116         size_t      n;
117         char       *tu_name;
118         const char *cup_name = be_get_irg_main_env(env->irg)->cup_name;
119
120         n = strlen(cup_name);
121         tu_name = XMALLOCN(char, n + 1);
122         strcpy(tu_name, cup_name);
123         for (i = 0; i < n; ++i)
124                 if (tu_name[i] == '.')
125                         tu_name[i] = '_';
126
127         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
128         xfree(tu_name);
129         result = fopen(buf, "wt");
130         if (result == NULL) {
131                 panic("Couldn't open '%s' for writing.", buf);
132         }
133
134         return result;
135 }
136 #endif
137
138
139 static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn)
140 {
141         const arch_register_class_t *cls = pbqp_alloc_env->cls;
142         pbqp_t   *pbqp_inst              = pbqp_alloc_env->pbqp_inst;
143         bitset_t *allocatable_regs       = pbqp_alloc_env->allocatable_regs;
144         unsigned  colors_n               = arch_register_class_n_regs(cls);
145         unsigned  cntConstrains          = 0;
146
147         /* create costs vector depending on register constrains */
148         vector_t *costs_vector = vector_alloc(pbqp_inst, colors_n);
149
150         /* set costs depending on register constrains */
151         unsigned idx;
152         for (idx = 0; idx < colors_n; idx++) {
153                 const arch_register_req_t *req = arch_get_irn_register_req(irn);
154                 const arch_register_t     *reg = arch_register_for_index(cls, idx);
155                 if (!bitset_is_set(allocatable_regs, idx)
156                     || !arch_reg_is_allocatable(req, reg)) {
157                         /* constrained */
158                         vector_set(costs_vector, idx, INF_COSTS);
159                         cntConstrains++;
160                 }
161         }
162
163         /* add vector to pbqp node */
164         add_node_costs(pbqp_inst, get_irn_idx(irn), costs_vector);
165         pbqp_alloc_env->restr_nodes[get_irn_idx(irn)] = cntConstrains;
166 }
167
168 static void insert_ife_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node)
169 {
170         pbqp_t                      *pbqp                = pbqp_alloc_env->pbqp_inst;
171         const arch_register_class_t *cls                 = pbqp_alloc_env->cls;
172         pbqp_matrix_t               *ife_matrix_template = pbqp_alloc_env->ife_matrix_template;
173         unsigned                    *restr_nodes         = pbqp_alloc_env->restr_nodes;
174
175         if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
176
177                 /* increase ife edge counter */
178                 pbqp_alloc_env->ife_edge_num[get_irn_idx(src_node)]++;
179                 pbqp_alloc_env->ife_edge_num[get_irn_idx(trg_node)]++;
180
181 #if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING
182                 /* do useful optimization to speed up pbqp solving (we can do this because we know our matrix) */
183                 if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
184                         assert(vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs) !=
185                                vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs) &&
186                                "Interfering nodes must not have the same register!");
187                         return;
188                 }
189                 if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
190                         if (get_free_regs(restr_nodes, cls, src_node) == 1) {
191                                 unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
192                                 vector_set(get_node(pbqp, get_irn_idx(trg_node))->costs, idx, INF_COSTS);
193                         }
194                         else {
195                                 unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
196                                 vector_set(get_node(pbqp, get_irn_idx(src_node))->costs, idx, INF_COSTS);
197                         }
198                         return;
199                 }
200 #endif
201                 /* insert interference edge */
202                 insert_edge(pbqp, src_node, trg_node, ife_matrix_template);
203         }
204 }
205
206 static void insert_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node, int pos)
207 {
208         pbqp_t                      *pbqp        = pbqp_alloc_env->pbqp_inst;
209         const arch_register_class_t *cls         = pbqp_alloc_env->cls;
210         unsigned                    *restr_nodes = pbqp_alloc_env->restr_nodes;
211         pbqp_matrix_t               *afe_matrix  = pbqp_matrix_alloc(pbqp, arch_register_class_n_regs(cls), arch_register_class_n_regs(cls));
212         unsigned                     colors_n    = arch_register_class_n_regs(cls);
213
214         if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
215                 if (use_exec_freq) {
216                         /* get exec_freq for copy_block */
217                         ir_node *root_bl = get_nodes_block(src_node);
218                         ir_node *copy_bl = is_Phi(src_node) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
219                         int      res     = get_block_execfreq_int(&pbqp_alloc_env->execfreq_factors, copy_bl);
220
221                         /* create afe-matrix */
222                         unsigned row, col;
223                         for (row = 0; row < colors_n; row++) {
224                                 for (col = 0; col < colors_n; col++) {
225                                         if (row != col)
226                                                 pbqp_matrix_set(afe_matrix, row, col, (num)res);
227                                 }
228                         }
229                 }
230                 else {
231                         afe_matrix = pbqp_alloc_env->aff_matrix_template;
232                 }
233 #if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING
234                 /* do useful optimization to speed up pbqp solving */
235                 if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
236                         return;
237                 }
238                 if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
239                         if (get_free_regs(restr_nodes, cls, src_node) == 1) {
240                                 unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
241                                 vector_add_matrix_col(get_node(pbqp, get_irn_idx(trg_node))->costs, afe_matrix, regIdx);
242                         }
243                         else {
244                                 unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
245                                 vector_add_matrix_col(get_node(pbqp, get_irn_idx(src_node))->costs, afe_matrix, regIdx);
246                         }
247                         return;
248                 }
249 #endif
250                 /* insert interference edge */
251                 insert_edge(pbqp, src_node, trg_node, afe_matrix);
252         }
253 }
254
255 static void create_affinity_edges(ir_node *irn, void *env)
256 {
257         be_pbqp_alloc_env_t         *pbqp_alloc_env = (be_pbqp_alloc_env_t*)env;
258         const arch_register_class_t *cls            = pbqp_alloc_env->cls;
259         const arch_register_req_t   *req            = arch_get_irn_register_req(irn);
260         unsigned                     pos;
261         unsigned                     max;
262
263         if (is_Reg_Phi(irn)) { /* Phis */
264                 for (pos = 0, max = get_irn_arity(irn); pos < max; ++pos) {
265                         ir_node *arg = get_irn_n(irn, pos);
266
267                         if (!arch_irn_consider_in_reg_alloc(cls, arg))
268                                 continue;
269
270                         /* no edges to itself */
271                         if (irn == arg) {
272                                 continue;
273                         }
274
275                         insert_afe_edge(pbqp_alloc_env, irn, arg, pos);
276                 }
277         }
278         else if (is_Perm_Proj(irn)) { /* Perms */
279                 ir_node *arg = get_Perm_src(irn);
280                 if (!arch_irn_consider_in_reg_alloc(cls, arg))
281                         return;
282
283                 insert_afe_edge(pbqp_alloc_env, irn, arg, -1);
284         } else if (arch_register_req_is(req, should_be_same)) {
285                 const unsigned other = req->other_same;
286                 int            i;
287
288                 for (i = 0; 1U << i <= other; ++i) {
289                         if (other & (1U << i)) {
290                                 ir_node *other = get_irn_n(skip_Proj(irn), i);
291                                 if (!arch_irn_consider_in_reg_alloc(cls, other))
292                                         continue;
293
294                                 /* no edges to itself */
295                                 if (irn == other) {
296                                         continue;
297                                 }
298
299                                 insert_afe_edge(pbqp_alloc_env, irn, other, i);
300                         }
301                 }
302         }
303 }
304
305 static void create_pbqp_coloring_instance(ir_node *block, void *data)
306 {
307         be_pbqp_alloc_env_t         *pbqp_alloc_env     = (be_pbqp_alloc_env_t*)data;
308         be_lv_t                     *lv                 = pbqp_alloc_env->lv;
309         const arch_register_class_t *cls                = pbqp_alloc_env->cls;
310         plist_t                     *rpeo               = pbqp_alloc_env->rpeo;
311         pbqp_t                      *pbqp_inst          = pbqp_alloc_env->pbqp_inst;
312         plist_t                     *temp_list          = plist_new();
313         plist_element_t             *el;
314         ir_nodeset_t                 live_nodes;
315 #if USE_BIPARTIT_MATCHING
316         int                         *assignment         = ALLOCAN(int, cls->n_regs);
317 #else
318         unsigned                    *restr_nodes        = pbqp_alloc_env->restr_nodes;
319         pqueue_t                    *restr_nodes_queue  = new_pqueue();
320         pqueue_t                    *queue              = new_pqueue();
321         plist_t                     *sorted_list        = plist_new();
322         ir_node                     *last_element       = NULL;
323 #endif
324
325         /* first, determine the pressure */
326         /* (this is only for compatibility with copymin optimization, it's not needed for pbqp coloring) */
327         create_borders(block, pbqp_alloc_env->env);
328
329         /* calculate living nodes for the first step */
330         ir_nodeset_init(&live_nodes);
331         be_liveness_end_of_block(lv, cls, block, &live_nodes);
332
333         /* create pbqp nodes, interference edges and reverse perfect elimination order */
334         sched_foreach_reverse(block, irn) {
335                 if (get_irn_mode(irn) == mode_T) {
336                         foreach_out_edge(irn, edge) {
337                                 ir_node *proj = get_edge_src_irn(edge);
338                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
339                                         continue;
340
341                                 /* create pbqp source node if it dosn't exist */
342                                 if (get_node(pbqp_inst, get_irn_idx(proj)) == NULL) {
343                                         create_pbqp_node(pbqp_alloc_env, proj);
344                                 }
345
346                                 /* create nodes and interference edges */
347                                 foreach_ir_nodeset(&live_nodes, live, iter) {
348                                         /* create pbqp source node if it dosn't exist */
349                                         if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
350                                                 create_pbqp_node(pbqp_alloc_env, live);
351                                         }
352
353                                         /* no edges to itself */
354                                         if (proj == live) {
355                                                 continue;
356                                         }
357
358                                         insert_ife_edge(pbqp_alloc_env, proj, live);
359                                 }
360                         }
361                 }
362                 else {
363                         if (arch_irn_consider_in_reg_alloc(cls, irn)) {
364                                 /* create pbqp source node if it dosn't exist */
365                                 if (get_node(pbqp_inst, get_irn_idx(irn)) == NULL) {
366                                         create_pbqp_node(pbqp_alloc_env, irn);
367                                 }
368
369                                 /* create nodes and interference edges */
370                                 foreach_ir_nodeset(&live_nodes, live, iter) {
371                                         /* create pbqp source node if it dosn't exist */
372                                         if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
373                                                 create_pbqp_node(pbqp_alloc_env, live);
374                                         }
375
376                                         /* no edges to itself */
377                                         if (irn == live) {
378                                                 continue;
379                                         }
380
381                                         /* insert interference edge */
382                                         insert_ife_edge(pbqp_alloc_env, irn, live);
383                                 }
384                         }
385                 }
386
387                 /* get living nodes for next step */
388                 if (!is_Phi(irn)) {
389                         be_liveness_transfer(cls, irn, &live_nodes);
390                 }
391
392 #if USE_BIPARTIT_MATCHING
393                 if (get_irn_mode(irn) == mode_T) {
394                         unsigned     clique_size         = 0;
395                         unsigned     n_alloc             = 0;
396                         pbqp_node   *clique[cls->n_regs];
397                         bipartite_t *bp                  = bipartite_new(cls->n_regs, cls->n_regs);
398
399                         /* add all proj after a perm to clique */
400                         foreach_out_edge(irn, edge) {
401                                 ir_node *proj = get_edge_src_irn(edge);
402
403                                 /* ignore node if it is not necessary for register allocation */
404                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
405                                         continue;
406
407                                 /* insert pbqp node into temp rpeo list of this block */
408                                 plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(proj)));
409
410                                 if(is_Perm_Proj(proj)) {
411                                         /* add proj to clique */
412                                         pbqp_node *clique_member = get_node(pbqp_inst,proj->node_idx);
413                                         vector    *costs         = clique_member->costs;
414                                         unsigned   idx           = 0;
415
416                                         clique[clique_size] = clique_member;
417
418                                         for(idx = 0; idx < costs->len; idx++) {
419                                                 if(costs->entries[idx].data != INF_COSTS) {
420                                                         bipartite_add(bp, clique_size, idx);
421                                                 }
422                                         }
423
424                                         /* increase node counter */
425                                         clique_size++;
426                                         n_alloc++;
427                                 }
428                         }
429
430                         if(clique_size > 0) {
431                                 plist_element_t *listElement;
432                                 foreach_plist(temp_list, listElement) {
433                                         pbqp_node *clique_candidate  = listElement->data;
434                                         unsigned   idx               = 0;
435                                         bool       isMember          = true;
436
437                                         /* clique size not bigger then register class size */
438                                         if(clique_size >= cls->n_regs) break;
439
440                                         for(idx = 0; idx < clique_size; idx++) {
441                                                 pbqp_node *member = clique[idx];
442
443                                                 if(member == clique_candidate) {
444                                                         isMember = false;
445                                                         break;
446                                                 }
447
448                                                 if(get_edge(pbqp_inst, member->index, clique_candidate->index) == NULL && get_edge(pbqp_inst, clique_candidate->index, member->index) == NULL) {
449                                                         isMember = false;
450                                                         break;
451                                                 }
452                                         }
453
454                                         /* goto next list element if current node is not a member of the clique */
455                                         if(!isMember) { continue; }
456
457                                         /* add candidate to clique */
458                                         clique[clique_size] = clique_candidate;
459
460                                         vector *costs = clique_candidate->costs;
461                                         for(idx = 0; idx < costs->len; idx++) {
462                                                 if(costs->entries[idx].data != INF_COSTS) {
463                                                         bipartite_add(bp, clique_size, idx);
464                                                 }
465                                         }
466
467                                         /* increase node counter */
468                                         clique_size++;
469                                 }
470                         }
471
472                         /* solve bipartite matching */
473                         bipartite_matching(bp, assignment);
474
475                         /* assign colors */
476                         unsigned nodeIdx = 0;
477                         for(nodeIdx = 0; nodeIdx < clique_size; nodeIdx++) {
478                                 vector *costs = clique[nodeIdx]->costs;
479                                 int     idx;
480                                 for(idx = 0; idx < (int)costs->len; idx++) {
481                                         if(assignment[nodeIdx] != idx) {
482                                                 costs->entries[idx].data = INF_COSTS;
483                                         }
484                                 }
485                                 assert(assignment[nodeIdx] >= 0 && "there must have been a register assigned (node not register pressure faithful?)");
486                         }
487
488                         /* free memory */
489                         bipartite_free(bp);
490                 }
491                 else {
492                         if (arch_irn_consider_in_reg_alloc(cls, irn)) {
493                                 plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn)));
494                         }
495                 }
496 #else
497                 /* order nodes for perfect elimination order */
498                 if (get_irn_mode(irn) == mode_T) {
499                         bool allHaveIFEdges = true;
500                         foreach_out_edge(irn, edge) {
501                                 ir_node *proj = get_edge_src_irn(edge);
502                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
503                                         continue;
504
505                                 /* insert proj node into priority queue (descending by the number of interference edges) */
506                                 if (get_free_regs(restr_nodes, cls, proj) <= 4) {
507                                         pqueue_put(restr_nodes_queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
508                                 }
509                                 else {
510                                         pqueue_put(queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
511                                 }
512
513                                 /* skip last step if there is no last_element */
514                                 if(last_element == NULL)
515                                         continue;
516
517                                 /* check if proj has an if edge to last_element (at this time pbqp contains only if edges) */
518                                 if(get_edge(pbqp_inst, proj->node_idx, last_element->node_idx) == NULL && get_edge(pbqp_inst, last_element->node_idx, proj->node_idx) == NULL) {
519                                         allHaveIFEdges = false; /* there is no if edge between proj and last_element */
520                                 }
521                         }
522
523                         if(last_element != NULL && allHaveIFEdges) {
524                                 if (get_free_regs(restr_nodes, cls, last_element) <= 4) {
525                                         pqueue_put(restr_nodes_queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
526                                 }
527                                 else {
528                                         pqueue_put(queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
529                                 }
530                                 plist_erase(temp_list, plist_find_value(temp_list, get_node(pbqp_inst, last_element->node_idx)));
531                                 last_element = NULL;
532                         }
533
534                         /* first insert all restricted proj nodes */
535                         while (!pqueue_empty(restr_nodes_queue)) {
536                                 ir_node *node = (ir_node*)pqueue_pop_front(restr_nodes_queue);
537                                 plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(node)));
538                         }
539
540                         /* insert proj nodes descending by their number of interference edges */
541                         while (!pqueue_empty(queue)) {
542                                 ir_node *node = (ir_node*)pqueue_pop_front(queue);
543                                 plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(node)));
544                         }
545
546                         /* invert sorted list */
547                         foreach_plist(sorted_list, el) {
548                                 plist_insert_front(temp_list, el->data);
549                         }
550
551                         plist_clear(sorted_list);
552
553                 }
554                 else {
555                         if (arch_irn_consider_in_reg_alloc(cls, irn)) {
556                                 // remember last colorable node
557                                 last_element = irn;
558                                 plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn)));
559                         }
560                         else {
561                                 // node not colorable, so ignore it
562                                 last_element = NULL;
563                         }
564                 }
565 #endif
566         }
567
568         /* add the temp rpeo list of this block to the global reverse perfect elimination order list*/
569         foreach_plist(temp_list, el) {
570                 plist_insert_back(rpeo, el->data);
571         }
572
573         /* free reserved memory */
574         ir_nodeset_destroy(&live_nodes);
575         plist_free(temp_list);
576 #if USE_BIPARTIT_MATCHING
577 #else
578         plist_free(sorted_list);
579         del_pqueue(queue);
580         del_pqueue(restr_nodes_queue);
581 #endif
582 }
583
584 static void insert_perms(ir_node *block, void *data)
585 {
586         be_chordal_env_t *env    = (be_chordal_env_t*)data;
587         ir_node          *irn;
588
589         for (irn = sched_first(block); !sched_is_end(irn);) {
590                 ir_node   *const next = sched_next(irn);
591                 be_insn_t *      insn = be_scan_insn(env, irn);
592                 if (insn)
593                         pre_process_constraints(env, &insn);
594
595                 irn = next;
596         }
597 }
598
599 static void be_pbqp_coloring(be_chordal_env_t *env)
600 {
601         ir_graph                    *irg            = env->irg;
602         const arch_register_class_t *cls            = env->cls;
603         be_lv_t                     *lv             = NULL;
604         plist_element_t             *element        = NULL;
605         unsigned                     colors_n       = arch_register_class_n_regs(cls);
606         be_pbqp_alloc_env_t          pbqp_alloc_env;
607         unsigned                     col;
608         unsigned                     row;
609         pbqp_matrix_t               *ife_matrix;
610         num                          solution;
611 #if KAPS_DUMP
612         FILE                        *file_before;
613 #endif
614 #if TIMER
615         ir_timer_t *t_ra_pbqp_alloc_create     = ir_timer_new();
616         ir_timer_t *t_ra_pbqp_alloc_solve      = ir_timer_new();
617         ir_timer_t *t_ra_pbqp_alloc_create_aff = ir_timer_new();
618
619         printf("#### ----- === Allocating registers of %s (%s) ===\n", cls->name, get_entity_name(get_irg_entity(irg)));
620 #endif
621         be_assure_live_sets(irg);
622         lv = be_get_irg_liveness(irg);
623
624         /* insert perms */
625         assure_doms(irg);
626         dom_tree_walk_irg(irg, insert_perms, NULL, env);
627
628         /* dump graph after inserting perms */
629         if (env->opts->dump_flags & BE_CH_DUMP_CONSTR) {
630                 char buf[256];
631                 snprintf(buf, sizeof(buf), "-%s-constr", cls->name);
632                 dump_ir_graph(irg, buf);
633         }
634
635         ir_calculate_execfreq_int_factors(&pbqp_alloc_env.execfreq_factors, irg);
636
637         /* initialize pbqp allocation data structure */
638         pbqp_alloc_env.pbqp_inst        = alloc_pbqp(get_irg_last_idx(irg));  /* initialize pbqp instance */
639         pbqp_alloc_env.cls              = cls;
640         pbqp_alloc_env.irg              = irg;
641         pbqp_alloc_env.lv               = lv;
642         pbqp_alloc_env.allocatable_regs = bitset_malloc(colors_n);
643         pbqp_alloc_env.rpeo             = plist_new();
644         pbqp_alloc_env.restr_nodes      = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
645         pbqp_alloc_env.ife_edge_num     = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
646         pbqp_alloc_env.env              = env;
647         be_put_allocatable_regs(irg, cls, pbqp_alloc_env.allocatable_regs);
648
649
650         /* create costs matrix template for interference edges */
651         ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
652         /* set costs */
653         for (row = 0, col = 0; row < colors_n; row++, col++)
654                 pbqp_matrix_set(ife_matrix, row, col, INF_COSTS);
655
656         pbqp_alloc_env.ife_matrix_template = ife_matrix;
657
658
659         if (!use_exec_freq) {
660                 /* create costs matrix template for affinity edges */
661                 pbqp_matrix_t *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
662                 /* set costs */
663                 for (row = 0; row < colors_n; row++) {
664                         for (col = 0; col < colors_n; col++) {
665                                 if (row != col)
666                                         pbqp_matrix_set(afe_matrix, row, col, 2);
667                         }
668                 }
669                 pbqp_alloc_env.aff_matrix_template = afe_matrix;
670         }
671
672
673         /* create pbqp instance */
674 #if TIMER
675         ir_timer_reset_and_start(t_ra_pbqp_alloc_create);
676 #endif
677         assure_doms(irg);
678         dom_tree_walk_irg(irg, create_pbqp_coloring_instance , NULL, &pbqp_alloc_env);
679 #if TIMER
680         ir_timer_stop(t_ra_pbqp_alloc_create);
681 #endif
682
683
684         /* set up affinity edges */
685 #if TIMER
686         ir_timer_reset_and_start(t_ra_pbqp_alloc_create_aff);
687 #endif
688         foreach_plist(pbqp_alloc_env.rpeo, element) {
689                 pbqp_node_t *node = (pbqp_node_t*)element->data;
690                 ir_node     *irn  = get_idx_irn(irg, node->index);
691
692                 create_affinity_edges(irn, &pbqp_alloc_env);
693         }
694 #if TIMER
695         ir_timer_stop(t_ra_pbqp_alloc_create_aff);
696 #endif
697
698
699 #if KAPS_DUMP
700         // dump graph before solving pbqp
701         file_before = my_open(env, "", "-pbqp_coloring.html");
702         set_dumpfile(pbqp_alloc_env.pbqp_inst, file_before);
703 #endif
704
705         /* print out reverse perfect elimination order */
706 #if PRINT_RPEO
707         {
708                 plist_element_t *elements;
709                 foreach_plist(pbqp_alloc_env.rpeo, elements) {
710                         pbqp_node_t *node = elements->data;
711                         printf(" %d(%ld);", node->index, get_idx_irn(irg, node->index)->node_nr);
712                 }
713                 printf("\n");
714         }
715 #endif
716
717         /* solve pbqp instance */
718 #if TIMER
719         ir_timer_reset_and_start(t_ra_pbqp_alloc_solve);
720 #endif
721         if(use_late_decision) {
722                 solve_pbqp_heuristical_co_ld(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
723         }
724         else {
725                 solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
726         }
727 #if TIMER
728         ir_timer_stop(t_ra_pbqp_alloc_solve);
729 #endif
730
731
732         solution = get_solution(pbqp_alloc_env.pbqp_inst);
733         if (solution == INF_COSTS)
734                 panic("No PBQP solution found");
735
736
737         /* assign colors */
738         foreach_plist(pbqp_alloc_env.rpeo, element) {
739                 pbqp_node_t           *node  = (pbqp_node_t*)element->data;
740                 ir_node               *irn   = get_idx_irn(irg, node->index);
741                 num                    color = get_node_solution(pbqp_alloc_env.pbqp_inst, node->index);
742                 const arch_register_t *reg   = arch_register_for_index(cls, color);
743
744                 arch_set_irn_register(irn, reg);
745         }
746
747
748 #if TIMER
749         printf("PBQP alloc create:     %10.3lf msec\n",
750                (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create) / 1000.0);
751         printf("PBQP alloc solve:      %10.3lf msec\n",
752                (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_solve) / 1000.0);
753         printf("PBQP alloc create aff: %10.3lf msec\n",
754                (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create_aff) / 1000.0);
755 #endif
756
757
758         /* free reserved memory */
759 #if KAPS_DUMP
760         fclose(file_before);
761 #endif
762         bitset_free(pbqp_alloc_env.allocatable_regs);
763         free_pbqp(pbqp_alloc_env.pbqp_inst);
764         plist_free(pbqp_alloc_env.rpeo);
765         xfree(pbqp_alloc_env.restr_nodes);
766         xfree(pbqp_alloc_env.ife_edge_num);
767 }
768
769
770 /**
771  * Initializes this module.
772  */
773 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pbqp_coloring)
774 void be_init_pbqp_coloring(void)
775 {
776         lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
777         lc_opt_entry_t *ra_grp       = lc_opt_get_grp(be_grp, "ra");
778         lc_opt_entry_t *chordal_grp  = lc_opt_get_grp(ra_grp, "chordal");
779         lc_opt_entry_t *coloring_grp = lc_opt_get_grp(chordal_grp, "coloring");
780         lc_opt_entry_t *pbqp_grp     = lc_opt_get_grp(coloring_grp, "pbqp");
781
782         static be_ra_chordal_coloring_t coloring = {
783                 be_pbqp_coloring
784         };
785
786         lc_opt_add_table(pbqp_grp, options);
787         be_register_chordal_coloring("pbqp", &coloring);
788 }