Now bipartite matching can be used for pre-coloring restricted cliques after a perm.
[libfirm] / ir / be / bepbqpcoloring.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       PBQP based register allocation.
23  * @author      Thomas Bersch
24  * @date        27.11.2009
25  * @version     $Id: bechordal.c 26750 2009-11-27 09:37:43Z bersch $
26  */
27
28 /*      miscellaneous includes */
29 #include "config.h"
30
31 #ifdef FIRM_KAPS
32
33 #include "debug.h"
34 #include "error.h"
35
36 #include "irdom.h"
37 #include "irdump.h"
38 #include "iredges_t.h"
39 #include "irprintf.h"
40 #include "irgwalk.h"
41 #include "time.h"
42
43 /* libfirm/ir/adt includes */
44 #include "bipartite.h"
45
46 /* libfirm/ir/be includes */
47 #include "bearch.h"
48 #include "beirg.h"
49 #include "besched.h"
50 #include "bemodule.h"
51 #include "bechordal_common.h"
52 #include "bechordal.h"
53 #include "bechordal_t.h"
54 #include "beinsn_t.h"
55 #include "benode.h"
56 #include "belive.h"
57 #include "belive_t.h"
58 #include "beutil.h"
59 #include "plist.h"
60 #include "pqueue.h"
61 #include "becopyopt.h"
62
63 /* pbqp includes */
64 #include "kaps.h"
65 #include "matrix.h"
66 #include "vector.h"
67 #include "vector_t.h"
68 #include "heuristical_co.h"
69 #include "heuristical_co_ld.h"
70 #include "pbqp_t.h"
71 #include "html_dumper.h"
72 #include "pbqp_node_t.h"
73 #include "pbqp_node.h"
74 #include "pbqp_edge_t.h"
75
76 #define TIMER                                   0
77 #define PRINT_RPEO                              0
78 #define USE_BIPARTIT_MATCHING   0
79 #define DO_USEFUL_OPT                   1
80
81
82 static int use_exec_freq                = true;
83 static int use_late_decision    = false;
84
85 typedef struct _be_pbqp_alloc_env_t {
86         pbqp                                            *pbqp_inst;                     /**< PBQP instance for register allocation */
87         ir_graph                        *irg;                   /**< The graph under examination. */
88         const arch_register_class_t *cls;                               /**< Current processed register class */
89         be_lv_t                     *lv;
90         bitset_t                    *ignored_regs;
91         pbqp_matrix                                     *ife_matrix_template;
92         pbqp_matrix                                     *aff_matrix_template;
93         plist_t                                         *rpeo;
94         unsigned                                        *restr_nodes;
95         unsigned                                        *ife_edge_num;
96         be_chordal_env_t                        *env;
97 } be_pbqp_alloc_env_t;
98
99
100 #define is_Reg_Phi(irn)                                                                                 (is_Phi(irn) && mode_is_data(get_irn_mode(irn)))
101 #define get_Perm_src(irn)                                                                               (get_irn_n(get_Proj_pred(irn), get_Proj_proj(irn)))
102 #define is_Perm_Proj(irn)                                                                               (is_Proj(irn) && be_is_Perm(get_Proj_pred(irn)))
103 #define insert_edge(pbqp, src_node, trg_node, template_matrix)  (add_edge_costs(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node), pbqp_matrix_copy(pbqp, template_matrix)))
104 #define get_free_regs(restr_nodes, cls, irn)                                    (arch_register_class_n_regs(cls) - restr_nodes[get_irn_idx(irn)])
105
106 static inline int is_2addr_code(const arch_register_req_t *req)
107 {
108         return (req->type & arch_register_req_type_should_be_same) != 0;
109 }
110
111 static const lc_opt_table_entry_t options[] = {
112         LC_OPT_ENT_BOOL      ("exec_freq", "use exec_freq",  &use_exec_freq),
113         LC_OPT_ENT_BOOL      ("late_decision", "use late decision for register allocation",  &use_late_decision),
114         LC_OPT_LAST
115 };
116
117 #if KAPS_DUMP
118 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
119 {
120         FILE *result;
121         char buf[1024];
122         size_t i, n;
123         char *tu_name;
124         const char *cup_name = be_get_irg_main_env(env->irg)->cup_name;
125
126         n = strlen(cup_name);
127         tu_name = XMALLOCN(char, n + 1);
128         strcpy(tu_name, cup_name);
129         for (i = 0; i < n; ++i)
130                 if (tu_name[i] == '.')
131                         tu_name[i] = '_';
132
133         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
134         xfree(tu_name);
135         result = fopen(buf, "wt");
136         if (result == NULL) {
137                 panic("Couldn't open '%s' for writing.", buf);
138         }
139
140         return result;
141 }
142 #endif
143
144
145 static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn)
146 {
147         const arch_register_class_t *cls = pbqp_alloc_env->cls;
148         pbqp     *pbqp_inst              = pbqp_alloc_env->pbqp_inst;
149         bitset_t *ignored_regs           = pbqp_alloc_env->ignored_regs;
150         unsigned  colors_n               = arch_register_class_n_regs(cls);
151         unsigned  cntConstrains          = 0;
152
153         /* create costs vector depending on register constrains */
154         struct vector *costs_vector = vector_alloc(pbqp_inst, colors_n);
155
156         /* set costs depending on register constrains */
157         unsigned idx;
158         for (idx = 0; idx < colors_n; idx++) {
159                 if (bitset_is_set(ignored_regs, idx) || !arch_reg_out_is_allocatable(irn, arch_register_for_index(cls, idx))) {
160                         /* constrained */
161                         vector_set(costs_vector, idx, INF_COSTS);
162                         cntConstrains++;
163                 }
164         }
165
166         /* add vector to pbqp node */
167         add_node_costs(pbqp_inst, get_irn_idx(irn), costs_vector);
168         pbqp_alloc_env->restr_nodes[get_irn_idx(irn)] = cntConstrains;
169 }
170
171 static void insert_ife_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node)
172 {
173         pbqp                                            *pbqp                = pbqp_alloc_env->pbqp_inst;
174         const arch_register_class_t *cls                 = pbqp_alloc_env->cls;
175         pbqp_matrix                             *ife_matrix_template = pbqp_alloc_env->ife_matrix_template;
176         unsigned                                        *restr_nodes         = pbqp_alloc_env->restr_nodes;
177
178         if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
179
180 //              /* increase ife edge counter */
181                 pbqp_alloc_env->ife_edge_num[get_irn_idx(src_node)]++;
182                 pbqp_alloc_env->ife_edge_num[get_irn_idx(trg_node)]++;
183
184 #if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING
185                 /* do useful optimization to speed up pbqp solving (we can do this because we know our matrix) */
186                 if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
187                         unsigned src_idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
188                         unsigned trg_idx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
189                         assert(src_idx != trg_idx && "Interfering nodes could not have the same register!");
190                         return;
191                 }
192                 if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
193                         if (get_free_regs(restr_nodes, cls, src_node) == 1) {
194                                 unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
195                                 vector_set(get_node(pbqp, get_irn_idx(trg_node))->costs, idx, INF_COSTS);
196                         }
197                         else {
198                                 unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
199                                 vector_set(get_node(pbqp, get_irn_idx(src_node))->costs, idx, INF_COSTS);
200                         }
201                         return;
202                 }
203 #endif
204                 /* insert interference edge */
205                 insert_edge(pbqp, src_node, trg_node, ife_matrix_template);
206         }
207 }
208
209 static void inser_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node, int pos)
210 {
211         pbqp                                            *pbqp             = pbqp_alloc_env->pbqp_inst;
212         const arch_register_class_t *cls              = pbqp_alloc_env->cls;
213         unsigned                                        *restr_nodes      = pbqp_alloc_env->restr_nodes;
214         pbqp_matrix                                     *afe_matrix       = pbqp_matrix_alloc(pbqp, arch_register_class_n_regs(cls), arch_register_class_n_regs(cls));
215         unsigned                                         colors_n                 = arch_register_class_n_regs(cls);
216
217         if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
218                 if (use_exec_freq) {
219                         /* get exec_freq for copy_block */
220                         ir_node *root_bl = get_nodes_block(src_node);
221                         ir_node *copy_bl = is_Phi(src_node) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
222                         ir_exec_freq *exec_freq = be_get_irg_exec_freq(pbqp_alloc_env->irg);
223                         unsigned long res = get_block_execfreq_ulong(exec_freq, copy_bl);
224
225                         /* create afe-matrix */
226                         unsigned row, col;
227                         for (row = 0; row < colors_n; row++) {
228                                 for (col = 0; col < colors_n; col++) {
229                                         if (row != col)
230                                                 pbqp_matrix_set(afe_matrix, row, col, (num)res);
231                                 }
232                         }
233                 }
234                 else {
235                         afe_matrix = pbqp_alloc_env->aff_matrix_template;
236                 }
237 #if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING
238                 /* do useful optimization to speed up pbqp solving */
239                 if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
240                         return;
241                 }
242                 if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
243                         if (get_free_regs(restr_nodes, cls, src_node) == 1) {
244                                 unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
245                                 vector_add_matrix_col(get_node(pbqp, get_irn_idx(trg_node))->costs, afe_matrix, regIdx);
246                         }
247                         else {
248                                 unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
249                                 vector_add_matrix_col(get_node(pbqp, get_irn_idx(src_node))->costs, afe_matrix, regIdx);
250                         }
251                         return;
252                 }
253 #endif
254                 /* insert interference edge */
255                 insert_edge(pbqp, src_node, trg_node, afe_matrix);
256         }
257 }
258
259 static void create_affinity_edges(ir_node *irn, void *env)
260 {
261         be_pbqp_alloc_env_t         *pbqp_alloc_env   = env;
262         const arch_register_class_t *cls              = pbqp_alloc_env->cls;
263         const arch_register_req_t   *req              = arch_get_register_req_out(irn);
264         unsigned pos, max;
265
266         if (is_Reg_Phi(irn)) { /* Phis */
267                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
268                         ir_node *arg = get_irn_n(irn, pos);
269
270                         if (!arch_irn_consider_in_reg_alloc(cls, arg))
271                                 continue;
272
273                         /* no edges to itself */
274                         if (irn == arg) {
275                                 continue;
276                         }
277
278                         inser_afe_edge(pbqp_alloc_env, irn, arg, pos);
279                 }
280         }
281         else if (is_Perm_Proj(irn)) { /* Perms */
282                 ir_node *arg = get_Perm_src(irn);
283                 if (!arch_irn_consider_in_reg_alloc(cls, arg))
284                         return;
285
286                 inser_afe_edge(pbqp_alloc_env, irn, arg, -1);
287         }
288         else { /* 2-address code */
289                 if (is_2addr_code(req)) {
290                         const unsigned other = req->other_same;
291                         int i;
292
293                         for (i = 0; 1U << i <= other; ++i) {
294                                 if (other & (1U << i)) {
295                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
296                                         if (!arch_irn_consider_in_reg_alloc(cls, other))
297                                                 continue;
298
299                                         /* no edges to itself */
300                                         if (irn == other) {
301                                                 continue;
302                                         }
303
304                                         inser_afe_edge(pbqp_alloc_env, irn, other, i);
305                                 }
306                         }
307                 }
308         }
309 }
310
311 static void create_pbqp_coloring_instance(ir_node *block, void *data)
312 {
313         be_pbqp_alloc_env_t         *pbqp_alloc_env     = data;
314         be_lv_t                     *lv                 = pbqp_alloc_env->lv;
315         const arch_register_class_t *cls                = pbqp_alloc_env->cls;
316         plist_t                                         *rpeo                           = pbqp_alloc_env->rpeo;
317         pbqp                                            *pbqp_inst                      = pbqp_alloc_env->pbqp_inst;
318         plist_t                                         *temp_list              = plist_new();
319         plist_element_t                         *el;
320         ir_node                     *irn;
321         ir_nodeset_t                 live_nodes;
322 #if USE_BIPARTIT_MATCHING
323         int                                             *assignment                     = ALLOCAN(int, cls->n_regs);
324 //      ir_graph                                        *irg                            = pbqp_alloc_env->irg;
325 #else
326         unsigned                                        *restr_nodes            = pbqp_alloc_env->restr_nodes;
327         pqueue_t                                        *restr_nodes_queue      = new_pqueue();
328         pqueue_t                                        *queue                  = new_pqueue();
329         plist_t                                         *sorted_list            = plist_new();
330         ir_node                                         *last_element           = NULL;
331 #endif
332
333         /* first, determine the pressure */
334         /* (this is only for compatibility with copymin optimization, it's not needed for pbqp coloring) */
335         create_borders(block, pbqp_alloc_env->env);
336
337         /* calculate living nodes for the first step */
338         ir_nodeset_init(&live_nodes);
339         be_liveness_end_of_block(lv, cls, block, &live_nodes);
340
341         /* create pbqp nodes, interference edges and reverse perfect elimination order */
342         sched_foreach_reverse(block, irn) {
343                 ir_node *live;
344                 ir_nodeset_iterator_t iter;
345
346                 if (get_irn_mode(irn) == mode_T) {
347                         const ir_edge_t *edge;
348                         foreach_out_edge(irn, edge) {
349                                 ir_node *proj = get_edge_src_irn(edge);
350                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
351                                         continue;
352
353                                 /* create pbqp source node if it dosn't exist */
354                                 if (get_node(pbqp_inst, get_irn_idx(proj)) == NULL) {
355                                         create_pbqp_node(pbqp_alloc_env, proj);
356                                 }
357
358                                 /* create nodes and interference edges */
359                                 foreach_ir_nodeset(&live_nodes, live, iter) {
360                                         /* create pbqp source node if it dosn't exist */
361                                         if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
362                                                 create_pbqp_node(pbqp_alloc_env, live);
363                                         }
364
365                                         /* no edges to itself */
366                                         if (proj == live) {
367                                                 continue;
368                                         }
369
370                                         insert_ife_edge(pbqp_alloc_env, proj, live);
371                                 }
372                         }
373                 }
374                 else {
375                         if (arch_irn_consider_in_reg_alloc(cls, irn)) {
376                                 /* create pbqp source node if it dosn't exist */
377                                 if (get_node(pbqp_inst, get_irn_idx(irn)) == NULL) {
378                                         create_pbqp_node(pbqp_alloc_env, irn);
379                                 }
380
381                                 /* create nodes and interference edges */
382                                 foreach_ir_nodeset(&live_nodes, live, iter) {
383                                         /* create pbqp source node if it dosn't exist */
384                                         if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
385                                                 create_pbqp_node(pbqp_alloc_env, live);
386                                         }
387
388                                         /* no edges to itself */
389                                         if (irn == live) {
390                                                 continue;
391                                         }
392
393                                         /* insert interference edge */
394                                         insert_ife_edge(pbqp_alloc_env, irn, live);
395                                 }
396                         }
397                 }
398
399                 /* get living nodes for next step */
400                 if (!is_Phi(irn)) {
401                         be_liveness_transfer(cls, irn, &live_nodes);
402                 }
403
404 #if USE_BIPARTIT_MATCHING
405                 if (get_irn_mode(irn) == mode_T) {
406
407                         unsigned clique_size = 0;
408                         unsigned n_alloc = 0;
409                         pbqp_node *clique[cls->n_regs];
410                         bipartite_t *bp = bipartite_new(cls->n_regs, cls->n_regs);
411
412                         /* add all proj after a perm to clique */
413                         const ir_edge_t *edge;
414                         foreach_out_edge(irn, edge) {
415                                 ir_node *proj = get_edge_src_irn(edge);
416
417                                 /* ignore node if it is not necessary for register allocation */
418                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
419                                         continue;
420
421                                 /* insert pbqp node into temp rpeo list of this block */
422                                 plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(proj)));
423
424                                 if(is_Perm_Proj(proj)) {
425                                         /* add proj to clique */
426                                         pbqp_node *clique_member = get_node(pbqp_inst,proj->node_idx);
427                                         clique[clique_size] = clique_member;
428                                         vector *costs = clique_member->costs;
429                                         unsigned idx = 0;
430                                         for(idx = 0; idx < costs->len; idx++) {
431                                                 if(costs->entries[idx].data != INF_COSTS) {
432                                                         bipartite_add(bp, clique_size, idx);
433                                                 }
434                                         }
435
436                                         /* increase node counter */
437                                         clique_size++;
438                                         n_alloc++;
439                                 }
440                         }
441
442                         if(clique_size > 0) {
443                                 plist_element_t *listElement;
444                                 foreach_plist(temp_list, listElement) {
445                                         pbqp_node       *clique_candidate       = listElement->data;
446                                         unsigned         idx                            = 0;
447                                         bool             isMember                       = true;
448
449                                         /* clique size not bigger then register class size */
450                                         if(clique_size >= cls->n_regs) break;
451
452                                         for(idx = 0; idx < clique_size; idx++) {
453                                                 pbqp_node *member = clique[idx];
454
455                                                 if(member == clique_candidate) {
456                                                         isMember = false;
457                                                         break;
458                                                 }
459
460                                                 if(get_edge(pbqp_inst, member->index, clique_candidate->index) == NULL && get_edge(pbqp_inst, clique_candidate->index, member->index) == NULL) {
461                                                         isMember = false;
462                                                         break;
463                                                 }
464                                         }
465
466                                         /* goto next list element if current node is not a member of the clique */
467                                         if(!isMember) { continue; }
468
469                                         /* add candidate to clique */
470                                         clique[clique_size] = clique_candidate;
471
472                                         vector *costs = clique_candidate->costs;
473                                         for(idx = 0; idx < costs->len; idx++) {
474                                                 if(costs->entries[idx].data != INF_COSTS) {
475                                                         bipartite_add(bp, clique_size, idx);
476                                                 }
477                                         }
478
479                                         /* increase node counter */
480                                         clique_size++;
481                                         }
482                                 }
483
484                         /* solve bipartite matching */
485                         bipartite_matching(bp, assignment);
486
487                         /* assign colors */
488                         unsigned nodeIdx = 0;
489                         for(nodeIdx = 0; nodeIdx < clique_size; nodeIdx++) {
490                                 vector *costs = clique[nodeIdx]->costs;
491                                 int idx;
492                                 for(idx = 0; idx < (int)costs->len; idx++) {
493                                         if(assignment[nodeIdx] != idx) {
494                                                 costs->entries[idx].data = INF_COSTS;
495                                         }
496                                 }
497                                 assert(assignment[nodeIdx] >= 0 && "there must have been a register assigned (node not register pressure faithful?)");
498                         }
499
500                         /* free memory */
501                         bipartite_free(bp);
502                 }
503                 else {
504                         if (arch_irn_consider_in_reg_alloc(cls, irn)) {
505                                 plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn)));
506                         }
507                 }
508 #else
509                 /* order nodes for perfect elimination order */
510                 if (get_irn_mode(irn) == mode_T) {
511                         bool allHaveIFEdges = true;
512
513                         const ir_edge_t *edge;
514                         foreach_out_edge(irn, edge) {
515                                 ir_node *proj = get_edge_src_irn(edge);
516                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
517                                         continue;
518
519                                 /* insert proj node into priority queue (descending by the number of interference edges) */
520                                 if (get_free_regs(restr_nodes, cls, proj) <= 4) {
521                                         pqueue_put(restr_nodes_queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
522                                 }
523                                 else {
524                                         pqueue_put(queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
525                                 }
526
527                                 /* skip last step if there is no last_element */
528                                 if(last_element == NULL)
529                                         continue;
530
531                                 /* check if proj has an if edge to last_element (at this time pbqp contains only if edges) */
532                                 if(get_edge(pbqp_inst, proj->node_idx, last_element->node_idx) == NULL && get_edge(pbqp_inst, last_element->node_idx, proj->node_idx) == NULL) {
533                                         allHaveIFEdges = false; /* there is no if edge between proj and last_element */
534                                 }
535                         }
536
537                         if(last_element != NULL && allHaveIFEdges) {
538                                 if (get_free_regs(restr_nodes, cls, last_element) <= 4) {
539                                         pqueue_put(restr_nodes_queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
540                                 }
541                                 else {
542                                         pqueue_put(queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
543                                 }
544                                 plist_erase(temp_list, plist_find_value(temp_list, get_node(pbqp_inst, last_element->node_idx)));
545                                 last_element = NULL;
546                         }
547
548                         /* first insert all restricted proj nodes */
549                         while (!pqueue_empty(restr_nodes_queue)) {
550                                 plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue))));
551                         }
552
553                         /* insert proj nodes descending by their number of interference edges */
554                         while (!pqueue_empty(queue)) {
555                                 plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue))));
556                         }
557
558                         /* invert sorted list */
559                         foreach_plist(sorted_list, el) {
560                                 plist_insert_front(temp_list, el->data);
561                         }
562
563                         plist_clear(sorted_list);
564
565                 }
566                 else {
567                         if (arch_irn_consider_in_reg_alloc(cls, irn)) {
568                                 // remember last colorable node
569                                 last_element = irn;
570                                 plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn)));
571                         }
572                         else {
573                                 // node not colorable, so ignore it
574                                 last_element = NULL;
575                         }
576                 }
577 #endif
578         }
579
580         /* add the temp rpeo list of this block to the global reverse perfect elimination order list*/
581         foreach_plist(temp_list, el) {
582                 plist_insert_back(rpeo, el->data);
583         }
584
585         /* free reserved memory */
586         ir_nodeset_destroy(&live_nodes);
587         plist_free(temp_list);
588 #if USE_BIPARTIT_MATCHING
589 #else
590         plist_free(sorted_list);
591         del_pqueue(queue);
592         del_pqueue(restr_nodes_queue);
593 #endif
594 }
595
596 static void insert_perms(ir_node *block, void *data)
597 {
598         /*
599          * Start silent in the start block.
600          * The silence remains until the first barrier is seen.
601          * Each other block is begun loud.
602          */
603         be_chordal_env_t *env    = data;
604         ir_node          *irn;
605         int               silent = block == get_irg_start_block(get_irn_irg(block));
606
607         /*
608          * If the block is the start block search the barrier and
609          * start handling constraints from there.
610          */
611         for (irn = sched_first(block); !sched_is_end(irn);) {
612                 int silent_old = silent;        /* store old silent value */
613                 if (be_is_Barrier(irn))
614                         silent = !silent;               /* toggle silent flag */
615
616                 be_insn_t *insn         = chordal_scan_insn(env, irn);
617                 irn                                     = insn->next_insn;
618
619                 if (silent_old)
620                         continue;
621
622                 if (!insn->has_constraints)
623                         continue;
624
625                 pre_process_constraints(env, &insn);
626         }
627 }
628
629 static void be_pbqp_coloring(be_chordal_env_t *env)
630 {
631         ir_graph                        *irg                    = env->irg;
632         const arch_register_class_t *cls                        = env->cls;
633         be_lv_t                                         *lv                             = NULL;
634         plist_element_t                         *element                = NULL;
635         unsigned                                         colors_n               = arch_register_class_n_regs(cls);
636         be_pbqp_alloc_env_t              pbqp_alloc_env;
637         unsigned                                         row, col;
638
639
640 #if TIMER
641         ir_timer_t *t_ra_pbqp_alloc_create     = ir_timer_new();
642         ir_timer_t *t_ra_pbqp_alloc_solve      = ir_timer_new();
643         ir_timer_t *t_ra_pbqp_alloc_create_aff = ir_timer_new();
644
645         printf("#### ----- === Allocating registers of %s (%s) ===\n", cls->name, get_entity_name(get_irg_entity(irg)));
646 #endif
647         lv = be_assure_liveness(irg);
648         be_liveness_assure_sets(lv);
649         be_liveness_assure_chk(lv);
650
651         /* insert perms */
652         assure_doms(irg);
653         dom_tree_walk_irg(irg, insert_perms, NULL, env);
654
655         /* dump graph after inserting perms */
656         if (env->opts->dump_flags & BE_CH_DUMP_CONSTR) {
657                 char buf[256];
658                 snprintf(buf, sizeof(buf), "-%s-constr", cls->name);
659                 dump_ir_graph(irg, buf);
660         }
661
662
663         /* initialize pbqp allocation data structure */
664         pbqp_alloc_env.pbqp_inst    = alloc_pbqp(get_irg_last_idx(irg));                /* initialize pbqp instance */
665         pbqp_alloc_env.cls          = cls;
666         pbqp_alloc_env.irg          = irg;
667         pbqp_alloc_env.lv           = lv;
668         pbqp_alloc_env.ignored_regs = bitset_malloc(colors_n);
669         pbqp_alloc_env.rpeo                     = plist_new();
670         pbqp_alloc_env.restr_nodes  = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
671         pbqp_alloc_env.ife_edge_num = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
672         pbqp_alloc_env.env                      = env;
673         be_put_ignore_regs(irg, cls, pbqp_alloc_env.ignored_regs);                              /* get ignored registers */
674
675
676         /* create costs matrix template for interference edges */
677         struct pbqp_matrix *ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
678         /* set costs */
679         for (row = 0, col=0; row < colors_n; row++, col++)
680                 pbqp_matrix_set(ife_matrix, row, col, INF_COSTS);
681
682         pbqp_alloc_env.ife_matrix_template = ife_matrix;
683
684
685         if (!use_exec_freq) {
686                 /* create costs matrix template for affinity edges */
687                 struct pbqp_matrix *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
688                 /* set costs */
689                 for (row = 0; row < colors_n; row++) {
690                         for (col = 0; col < colors_n; col++) {
691                                 if (row != col)
692                                         pbqp_matrix_set(afe_matrix, row, col, 2);
693                         }
694                 }
695                 pbqp_alloc_env.aff_matrix_template = afe_matrix;
696         }
697
698
699         /* create pbqp instance */
700 #if TIMER
701         ir_timer_reset_and_start(t_ra_pbqp_alloc_create);
702 #endif
703         assure_doms(irg);
704         dom_tree_walk_irg(irg, create_pbqp_coloring_instance , NULL, &pbqp_alloc_env);
705 #if TIMER
706         ir_timer_stop(t_ra_pbqp_alloc_create);
707 #endif
708
709
710         /* set up affinity edges */
711 #if TIMER
712         ir_timer_reset_and_start(t_ra_pbqp_alloc_create_aff);
713 #endif
714         foreach_plist(pbqp_alloc_env.rpeo, element) {
715                 pbqp_node       *node   = element->data;
716                 ir_node         *irn    = get_idx_irn(irg, node->index);
717
718                 create_affinity_edges(irn, &pbqp_alloc_env);
719         }
720 #if TIMER
721         ir_timer_stop(t_ra_pbqp_alloc_create_aff);
722 #endif
723
724
725 #if KAPS_DUMP
726         // dump graph before solving pbqp
727         FILE *file_before = my_open(env, "", "-pbqp_coloring.html");
728         set_dumpfile(pbqp_alloc_env.pbqp_inst, file_before);
729 #endif
730
731         /* print out reverse perfect eleminiation order */
732 #if PRINT_RPEO
733         plist_element_t *elements;
734         foreach_plist(pbqp_alloc_env.rpeo, elements) {
735                 pbqp_node *node                    = elements->data;
736                 printf(" %d(%lu);", node->index, get_idx_irn(irg, node->index)->node_nr);
737         }
738         printf("\n");
739 #endif
740
741         /* solve pbqp instance */
742 #if TIMER
743         ir_timer_reset_and_start(t_ra_pbqp_alloc_solve);
744 #endif
745         if(use_late_decision) {
746                 solve_pbqp_heuristical_co_ld(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
747         }
748         else {
749                 solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
750         }
751 #if TIMER
752         ir_timer_stop(t_ra_pbqp_alloc_solve);
753 #endif
754
755
756         num solution = get_solution(pbqp_alloc_env.pbqp_inst);
757         assert(solution != INF_COSTS && "No PBQP solution found");
758
759
760         /* assign colors */
761         foreach_plist(pbqp_alloc_env.rpeo, element) {
762                 pbqp_node                               *node   = element->data;
763                 ir_node                                 *irn    = get_idx_irn(irg, node->index);
764                 num                                      color  = get_node_solution(pbqp_alloc_env.pbqp_inst, node->index);
765                 const arch_register_t   *reg    = arch_register_for_index(cls, color);
766
767                 arch_set_irn_register(irn, reg);
768         }
769
770
771 #if TIMER
772         printf("PBQP alloc create:     %10.3lf msec\n",
773                (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create) / 1000.0);
774         printf("PBQP alloc solve:      %10.3lf msec\n",
775                (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_solve) / 1000.0);
776         printf("PBQP alloc create aff: %10.3lf msec\n",
777                (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create_aff) / 1000.0);
778 #endif
779
780
781         /* free reserved memory */
782 #if KAPS_DUMP
783         fclose(file_before);
784 #endif
785         bitset_free(pbqp_alloc_env.ignored_regs);
786         free_pbqp(pbqp_alloc_env.pbqp_inst);
787         plist_free(pbqp_alloc_env.rpeo);
788         xfree(pbqp_alloc_env.restr_nodes);
789         xfree(pbqp_alloc_env.ife_edge_num);
790 }
791
792
793 /**
794  * Initializes this module.
795  */
796 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pbqp_coloring);
797 void be_init_pbqp_coloring(void)
798 {
799         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
800         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
801         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
802         lc_opt_entry_t *coloring_grp = lc_opt_get_grp(chordal_grp, "coloring");
803         lc_opt_entry_t *pbqp_grp = lc_opt_get_grp(coloring_grp, "pbqp");
804
805         static be_ra_chordal_coloring_t coloring = {
806                 be_pbqp_coloring
807         };
808
809         lc_opt_add_table(pbqp_grp, options);
810         be_register_chordal_coloring("pbqp", &coloring);
811 }
812
813 #endif