ia32: improve Test peephole optimisation
[libfirm] / ir / be / bepbqpcoloring.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       PBQP based register allocation.
23  * @author      Thomas Bersch
24  * @date        27.11.2009
25  * @version     $Id: bechordal.c 26750 2009-11-27 09:37:43Z bersch $
26  */
27
28 /* miscellaneous includes */
29 #include "config.h"
30
31 #include "debug.h"
32 #include "error.h"
33
34 #include "irdom.h"
35 #include "irdump.h"
36 #include "iredges_t.h"
37 #include "irprintf.h"
38 #include "irgwalk.h"
39 #include "irtools.h"
40 #include "time.h"
41
42 /* libfirm/ir/adt includes */
43 #include "bipartite.h"
44
45 /* libfirm/ir/be includes */
46 #include "bearch.h"
47 #include "beirg.h"
48 #include "besched.h"
49 #include "bemodule.h"
50 #include "bechordal_common.h"
51 #include "bechordal.h"
52 #include "bechordal_t.h"
53 #include "beinsn_t.h"
54 #include "benode.h"
55 #include "belive.h"
56 #include "belive_t.h"
57 #include "beutil.h"
58 #include "plist.h"
59 #include "pqueue.h"
60 #include "becopyopt.h"
61
62 /* pbqp includes */
63 #include "kaps.h"
64 #include "matrix.h"
65 #include "vector.h"
66 #include "vector_t.h"
67 #include "heuristical_co.h"
68 #include "heuristical_co_ld.h"
69 #include "pbqp_t.h"
70 #include "html_dumper.h"
71 #include "pbqp_node_t.h"
72 #include "pbqp_node.h"
73 #include "pbqp_edge_t.h"
74
75 #define TIMER                 0
76 #define PRINT_RPEO            0
77 #define USE_BIPARTIT_MATCHING 0
78 #define DO_USEFUL_OPT         1
79
80
81 static int use_exec_freq     = true;
82 static int use_late_decision = false;
83
84 typedef struct be_pbqp_alloc_env_t {
85         pbqp_t                      *pbqp_inst;         /**< PBQP instance for register allocation */
86         ir_graph                    *irg;               /**< The graph under examination. */
87         const arch_register_class_t *cls;               /**< Current processed register class */
88         be_lv_t                     *lv;
89         bitset_t                    *allocatable_regs;
90         pbqp_matrix_t               *ife_matrix_template;
91         pbqp_matrix_t               *aff_matrix_template;
92         plist_t                     *rpeo;
93         unsigned                    *restr_nodes;
94         unsigned                    *ife_edge_num;
95         be_chordal_env_t            *env;
96 } be_pbqp_alloc_env_t;
97
98
99 #define is_Reg_Phi(irn)                                        (is_Phi(irn) && mode_is_data(get_irn_mode(irn)))
100 #define get_Perm_src(irn)                                      (get_irn_n(get_Proj_pred(irn), get_Proj_proj(irn)))
101 #define is_Perm_Proj(irn)                                      (is_Proj(irn) && be_is_Perm(get_Proj_pred(irn)))
102 #define insert_edge(pbqp, src_node, trg_node, template_matrix) (add_edge_costs(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node), pbqp_matrix_copy(pbqp, template_matrix)))
103 #define get_free_regs(restr_nodes, cls, irn)                   (arch_register_class_n_regs(cls) - restr_nodes[get_irn_idx(irn)])
104
105 static inline int is_2addr_code(const arch_register_req_t *req)
106 {
107         return (req->type & arch_register_req_type_should_be_same) != 0;
108 }
109
110 static const lc_opt_table_entry_t options[] = {
111         LC_OPT_ENT_BOOL("exec_freq", "use exec_freq",  &use_exec_freq),
112         LC_OPT_ENT_BOOL("late_decision", "use late decision for register allocation",  &use_late_decision),
113         LC_OPT_LAST
114 };
115
116 #if KAPS_DUMP
117 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
118 {
119         FILE       *result;
120         char        buf[1024];
121         size_t      i;
122         size_t      n;
123         char       *tu_name;
124         const char *cup_name = be_get_irg_main_env(env->irg)->cup_name;
125
126         n = strlen(cup_name);
127         tu_name = XMALLOCN(char, n + 1);
128         strcpy(tu_name, cup_name);
129         for (i = 0; i < n; ++i)
130                 if (tu_name[i] == '.')
131                         tu_name[i] = '_';
132
133         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
134         xfree(tu_name);
135         result = fopen(buf, "wt");
136         if (result == NULL) {
137                 panic("Couldn't open '%s' for writing.", buf);
138         }
139
140         return result;
141 }
142 #endif
143
144
145 static void create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn)
146 {
147         const arch_register_class_t *cls = pbqp_alloc_env->cls;
148         pbqp_t   *pbqp_inst              = pbqp_alloc_env->pbqp_inst;
149         bitset_t *allocatable_regs       = pbqp_alloc_env->allocatable_regs;
150         unsigned  colors_n               = arch_register_class_n_regs(cls);
151         unsigned  cntConstrains          = 0;
152
153         /* create costs vector depending on register constrains */
154         vector_t *costs_vector = vector_alloc(pbqp_inst, colors_n);
155
156         /* set costs depending on register constrains */
157         unsigned idx;
158         for (idx = 0; idx < colors_n; idx++) {
159                 const arch_register_req_t *req = arch_get_irn_register_req(irn);
160                 const arch_register_t     *reg = arch_register_for_index(cls, idx);
161                 if (!bitset_is_set(allocatable_regs, idx)
162                     || !arch_reg_is_allocatable(req, reg)) {
163                         /* constrained */
164                         vector_set(costs_vector, idx, INF_COSTS);
165                         cntConstrains++;
166                 }
167         }
168
169         /* add vector to pbqp node */
170         add_node_costs(pbqp_inst, get_irn_idx(irn), costs_vector);
171         pbqp_alloc_env->restr_nodes[get_irn_idx(irn)] = cntConstrains;
172 }
173
174 static void insert_ife_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node)
175 {
176         pbqp_t                      *pbqp                = pbqp_alloc_env->pbqp_inst;
177         const arch_register_class_t *cls                 = pbqp_alloc_env->cls;
178         pbqp_matrix_t               *ife_matrix_template = pbqp_alloc_env->ife_matrix_template;
179         unsigned                    *restr_nodes         = pbqp_alloc_env->restr_nodes;
180
181         if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
182
183                 /* increase ife edge counter */
184                 pbqp_alloc_env->ife_edge_num[get_irn_idx(src_node)]++;
185                 pbqp_alloc_env->ife_edge_num[get_irn_idx(trg_node)]++;
186
187 #if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING
188                 /* do useful optimization to speed up pbqp solving (we can do this because we know our matrix) */
189                 if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
190                         assert(vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs) !=
191                                vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs) &&
192                                "Interfering nodes must not have the same register!");
193                         return;
194                 }
195                 if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
196                         if (get_free_regs(restr_nodes, cls, src_node) == 1) {
197                                 unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
198                                 vector_set(get_node(pbqp, get_irn_idx(trg_node))->costs, idx, INF_COSTS);
199                         }
200                         else {
201                                 unsigned idx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
202                                 vector_set(get_node(pbqp, get_irn_idx(src_node))->costs, idx, INF_COSTS);
203                         }
204                         return;
205                 }
206 #endif
207                 /* insert interference edge */
208                 insert_edge(pbqp, src_node, trg_node, ife_matrix_template);
209         }
210 }
211
212 static void insert_afe_edge(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *src_node, ir_node *trg_node, int pos)
213 {
214         pbqp_t                      *pbqp        = pbqp_alloc_env->pbqp_inst;
215         const arch_register_class_t *cls         = pbqp_alloc_env->cls;
216         unsigned                    *restr_nodes = pbqp_alloc_env->restr_nodes;
217         pbqp_matrix_t               *afe_matrix  = pbqp_matrix_alloc(pbqp, arch_register_class_n_regs(cls), arch_register_class_n_regs(cls));
218         unsigned                     colors_n    = arch_register_class_n_regs(cls);
219
220         if (get_edge(pbqp, get_irn_idx(src_node), get_irn_idx(trg_node)) == NULL) {
221                 if (use_exec_freq) {
222                         /* get exec_freq for copy_block */
223                         ir_node       *root_bl   = get_nodes_block(src_node);
224                         ir_node       *copy_bl   = is_Phi(src_node) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
225                         ir_exec_freq  *exec_freq = be_get_irg_exec_freq(pbqp_alloc_env->irg);
226                         unsigned long  res       = get_block_execfreq_ulong(exec_freq, copy_bl);
227
228                         /* create afe-matrix */
229                         unsigned row, col;
230                         for (row = 0; row < colors_n; row++) {
231                                 for (col = 0; col < colors_n; col++) {
232                                         if (row != col)
233                                                 pbqp_matrix_set(afe_matrix, row, col, (num)res);
234                                 }
235                         }
236                 }
237                 else {
238                         afe_matrix = pbqp_alloc_env->aff_matrix_template;
239                 }
240 #if DO_USEFUL_OPT || USE_BIPARTIT_MATCHING
241                 /* do useful optimization to speed up pbqp solving */
242                 if (get_free_regs(restr_nodes, cls, src_node) == 1 && get_free_regs(restr_nodes, cls, trg_node) == 1) {
243                         return;
244                 }
245                 if (get_free_regs(restr_nodes, cls, src_node) == 1 || get_free_regs(restr_nodes, cls, trg_node) == 1) {
246                         if (get_free_regs(restr_nodes, cls, src_node) == 1) {
247                                 unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(src_node))->costs);
248                                 vector_add_matrix_col(get_node(pbqp, get_irn_idx(trg_node))->costs, afe_matrix, regIdx);
249                         }
250                         else {
251                                 unsigned regIdx = vector_get_min_index(get_node(pbqp, get_irn_idx(trg_node))->costs);
252                                 vector_add_matrix_col(get_node(pbqp, get_irn_idx(src_node))->costs, afe_matrix, regIdx);
253                         }
254                         return;
255                 }
256 #endif
257                 /* insert interference edge */
258                 insert_edge(pbqp, src_node, trg_node, afe_matrix);
259         }
260 }
261
262 static void create_affinity_edges(ir_node *irn, void *env)
263 {
264         be_pbqp_alloc_env_t         *pbqp_alloc_env = (be_pbqp_alloc_env_t*)env;
265         const arch_register_class_t *cls            = pbqp_alloc_env->cls;
266         const arch_register_req_t   *req            = arch_get_irn_register_req(irn);
267         unsigned                     pos;
268         unsigned                     max;
269
270         if (is_Reg_Phi(irn)) { /* Phis */
271                 for (pos = 0, max = get_irn_arity(irn); pos < max; ++pos) {
272                         ir_node *arg = get_irn_n(irn, pos);
273
274                         if (!arch_irn_consider_in_reg_alloc(cls, arg))
275                                 continue;
276
277                         /* no edges to itself */
278                         if (irn == arg) {
279                                 continue;
280                         }
281
282                         insert_afe_edge(pbqp_alloc_env, irn, arg, pos);
283                 }
284         }
285         else if (is_Perm_Proj(irn)) { /* Perms */
286                 ir_node *arg = get_Perm_src(irn);
287                 if (!arch_irn_consider_in_reg_alloc(cls, arg))
288                         return;
289
290                 insert_afe_edge(pbqp_alloc_env, irn, arg, -1);
291         }
292         else { /* 2-address code */
293                 if (is_2addr_code(req)) {
294                         const unsigned other = req->other_same;
295                         int            i;
296
297                         for (i = 0; 1U << i <= other; ++i) {
298                                 if (other & (1U << i)) {
299                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
300                                         if (!arch_irn_consider_in_reg_alloc(cls, other))
301                                                 continue;
302
303                                         /* no edges to itself */
304                                         if (irn == other) {
305                                                 continue;
306                                         }
307
308                                         insert_afe_edge(pbqp_alloc_env, irn, other, i);
309                                 }
310                         }
311                 }
312         }
313 }
314
315 static void create_pbqp_coloring_instance(ir_node *block, void *data)
316 {
317         be_pbqp_alloc_env_t         *pbqp_alloc_env     = (be_pbqp_alloc_env_t*)data;
318         be_lv_t                     *lv                 = pbqp_alloc_env->lv;
319         const arch_register_class_t *cls                = pbqp_alloc_env->cls;
320         plist_t                     *rpeo               = pbqp_alloc_env->rpeo;
321         pbqp_t                      *pbqp_inst          = pbqp_alloc_env->pbqp_inst;
322         plist_t                     *temp_list          = plist_new();
323         plist_element_t             *el;
324         ir_node                     *irn;
325         ir_nodeset_t                 live_nodes;
326 #if USE_BIPARTIT_MATCHING
327         int                         *assignment         = ALLOCAN(int, cls->n_regs);
328 #else
329         unsigned                    *restr_nodes        = pbqp_alloc_env->restr_nodes;
330         pqueue_t                    *restr_nodes_queue  = new_pqueue();
331         pqueue_t                    *queue              = new_pqueue();
332         plist_t                     *sorted_list        = plist_new();
333         ir_node                     *last_element       = NULL;
334 #endif
335
336         /* first, determine the pressure */
337         /* (this is only for compatibility with copymin optimization, it's not needed for pbqp coloring) */
338         create_borders(block, pbqp_alloc_env->env);
339
340         /* calculate living nodes for the first step */
341         ir_nodeset_init(&live_nodes);
342         be_liveness_end_of_block(lv, cls, block, &live_nodes);
343
344         /* create pbqp nodes, interference edges and reverse perfect elimination order */
345         sched_foreach_reverse(block, irn) {
346                 ir_node               *live;
347                 ir_nodeset_iterator_t  iter;
348
349                 if (get_irn_mode(irn) == mode_T) {
350                         const ir_edge_t *edge;
351                         foreach_out_edge(irn, edge) {
352                                 ir_node *proj = get_edge_src_irn(edge);
353                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
354                                         continue;
355
356                                 /* create pbqp source node if it dosn't exist */
357                                 if (get_node(pbqp_inst, get_irn_idx(proj)) == NULL) {
358                                         create_pbqp_node(pbqp_alloc_env, proj);
359                                 }
360
361                                 /* create nodes and interference edges */
362                                 foreach_ir_nodeset(&live_nodes, live, iter) {
363                                         /* create pbqp source node if it dosn't exist */
364                                         if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
365                                                 create_pbqp_node(pbqp_alloc_env, live);
366                                         }
367
368                                         /* no edges to itself */
369                                         if (proj == live) {
370                                                 continue;
371                                         }
372
373                                         insert_ife_edge(pbqp_alloc_env, proj, live);
374                                 }
375                         }
376                 }
377                 else {
378                         if (arch_irn_consider_in_reg_alloc(cls, irn)) {
379                                 /* create pbqp source node if it dosn't exist */
380                                 if (get_node(pbqp_inst, get_irn_idx(irn)) == NULL) {
381                                         create_pbqp_node(pbqp_alloc_env, irn);
382                                 }
383
384                                 /* create nodes and interference edges */
385                                 foreach_ir_nodeset(&live_nodes, live, iter) {
386                                         /* create pbqp source node if it dosn't exist */
387                                         if (get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
388                                                 create_pbqp_node(pbqp_alloc_env, live);
389                                         }
390
391                                         /* no edges to itself */
392                                         if (irn == live) {
393                                                 continue;
394                                         }
395
396                                         /* insert interference edge */
397                                         insert_ife_edge(pbqp_alloc_env, irn, live);
398                                 }
399                         }
400                 }
401
402                 /* get living nodes for next step */
403                 if (!is_Phi(irn)) {
404                         be_liveness_transfer(cls, irn, &live_nodes);
405                 }
406
407 #if USE_BIPARTIT_MATCHING
408                 if (get_irn_mode(irn) == mode_T) {
409                         unsigned     clique_size         = 0;
410                         unsigned     n_alloc             = 0;
411                         pbqp_node   *clique[cls->n_regs];
412                         bipartite_t *bp                  = bipartite_new(cls->n_regs, cls->n_regs);
413
414                         /* add all proj after a perm to clique */
415                         const ir_edge_t *edge;
416                         foreach_out_edge(irn, edge) {
417                                 ir_node *proj = get_edge_src_irn(edge);
418
419                                 /* ignore node if it is not necessary for register allocation */
420                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
421                                         continue;
422
423                                 /* insert pbqp node into temp rpeo list of this block */
424                                 plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(proj)));
425
426                                 if(is_Perm_Proj(proj)) {
427                                         /* add proj to clique */
428                                         pbqp_node *clique_member = get_node(pbqp_inst,proj->node_idx);
429                                         vector    *costs         = clique_member->costs;
430                                         unsigned   idx           = 0;
431
432                                         clique[clique_size] = clique_member;
433
434                                         for(idx = 0; idx < costs->len; idx++) {
435                                                 if(costs->entries[idx].data != INF_COSTS) {
436                                                         bipartite_add(bp, clique_size, idx);
437                                                 }
438                                         }
439
440                                         /* increase node counter */
441                                         clique_size++;
442                                         n_alloc++;
443                                 }
444                         }
445
446                         if(clique_size > 0) {
447                                 plist_element_t *listElement;
448                                 foreach_plist(temp_list, listElement) {
449                                         pbqp_node *clique_candidate  = listElement->data;
450                                         unsigned   idx               = 0;
451                                         bool       isMember          = true;
452
453                                         /* clique size not bigger then register class size */
454                                         if(clique_size >= cls->n_regs) break;
455
456                                         for(idx = 0; idx < clique_size; idx++) {
457                                                 pbqp_node *member = clique[idx];
458
459                                                 if(member == clique_candidate) {
460                                                         isMember = false;
461                                                         break;
462                                                 }
463
464                                                 if(get_edge(pbqp_inst, member->index, clique_candidate->index) == NULL && get_edge(pbqp_inst, clique_candidate->index, member->index) == NULL) {
465                                                         isMember = false;
466                                                         break;
467                                                 }
468                                         }
469
470                                         /* goto next list element if current node is not a member of the clique */
471                                         if(!isMember) { continue; }
472
473                                         /* add candidate to clique */
474                                         clique[clique_size] = clique_candidate;
475
476                                         vector *costs = clique_candidate->costs;
477                                         for(idx = 0; idx < costs->len; idx++) {
478                                                 if(costs->entries[idx].data != INF_COSTS) {
479                                                         bipartite_add(bp, clique_size, idx);
480                                                 }
481                                         }
482
483                                         /* increase node counter */
484                                         clique_size++;
485                                 }
486                         }
487
488                         /* solve bipartite matching */
489                         bipartite_matching(bp, assignment);
490
491                         /* assign colors */
492                         unsigned nodeIdx = 0;
493                         for(nodeIdx = 0; nodeIdx < clique_size; nodeIdx++) {
494                                 vector *costs = clique[nodeIdx]->costs;
495                                 int     idx;
496                                 for(idx = 0; idx < (int)costs->len; idx++) {
497                                         if(assignment[nodeIdx] != idx) {
498                                                 costs->entries[idx].data = INF_COSTS;
499                                         }
500                                 }
501                                 assert(assignment[nodeIdx] >= 0 && "there must have been a register assigned (node not register pressure faithful?)");
502                         }
503
504                         /* free memory */
505                         bipartite_free(bp);
506                 }
507                 else {
508                         if (arch_irn_consider_in_reg_alloc(cls, irn)) {
509                                 plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn)));
510                         }
511                 }
512 #else
513                 /* order nodes for perfect elimination order */
514                 if (get_irn_mode(irn) == mode_T) {
515                         bool             allHaveIFEdges = true;
516                         const ir_edge_t *edge;
517
518                         foreach_out_edge(irn, edge) {
519                                 ir_node *proj = get_edge_src_irn(edge);
520                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
521                                         continue;
522
523                                 /* insert proj node into priority queue (descending by the number of interference edges) */
524                                 if (get_free_regs(restr_nodes, cls, proj) <= 4) {
525                                         pqueue_put(restr_nodes_queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
526                                 }
527                                 else {
528                                         pqueue_put(queue, proj, pbqp_alloc_env->ife_edge_num[get_irn_idx(proj)]);
529                                 }
530
531                                 /* skip last step if there is no last_element */
532                                 if(last_element == NULL)
533                                         continue;
534
535                                 /* check if proj has an if edge to last_element (at this time pbqp contains only if edges) */
536                                 if(get_edge(pbqp_inst, proj->node_idx, last_element->node_idx) == NULL && get_edge(pbqp_inst, last_element->node_idx, proj->node_idx) == NULL) {
537                                         allHaveIFEdges = false; /* there is no if edge between proj and last_element */
538                                 }
539                         }
540
541                         if(last_element != NULL && allHaveIFEdges) {
542                                 if (get_free_regs(restr_nodes, cls, last_element) <= 4) {
543                                         pqueue_put(restr_nodes_queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
544                                 }
545                                 else {
546                                         pqueue_put(queue, last_element, pbqp_alloc_env->ife_edge_num[get_irn_idx(last_element)]);
547                                 }
548                                 plist_erase(temp_list, plist_find_value(temp_list, get_node(pbqp_inst, last_element->node_idx)));
549                                 last_element = NULL;
550                         }
551
552                         /* first insert all restricted proj nodes */
553                         while (!pqueue_empty(restr_nodes_queue)) {
554                                 ir_node *node = (ir_node*)pqueue_pop_front(restr_nodes_queue);
555                                 plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(node)));
556                         }
557
558                         /* insert proj nodes descending by their number of interference edges */
559                         while (!pqueue_empty(queue)) {
560                                 ir_node *node = (ir_node*)pqueue_pop_front(queue);
561                                 plist_insert_front(sorted_list, get_node(pbqp_inst, get_irn_idx(node)));
562                         }
563
564                         /* invert sorted list */
565                         foreach_plist(sorted_list, el) {
566                                 plist_insert_front(temp_list, el->data);
567                         }
568
569                         plist_clear(sorted_list);
570
571                 }
572                 else {
573                         if (arch_irn_consider_in_reg_alloc(cls, irn)) {
574                                 // remember last colorable node
575                                 last_element = irn;
576                                 plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn)));
577                         }
578                         else {
579                                 // node not colorable, so ignore it
580                                 last_element = NULL;
581                         }
582                 }
583 #endif
584         }
585
586         /* add the temp rpeo list of this block to the global reverse perfect elimination order list*/
587         foreach_plist(temp_list, el) {
588                 plist_insert_back(rpeo, el->data);
589         }
590
591         /* free reserved memory */
592         ir_nodeset_destroy(&live_nodes);
593         plist_free(temp_list);
594 #if USE_BIPARTIT_MATCHING
595 #else
596         plist_free(sorted_list);
597         del_pqueue(queue);
598         del_pqueue(restr_nodes_queue);
599 #endif
600 }
601
602 static void insert_perms(ir_node *block, void *data)
603 {
604         be_chordal_env_t *env    = (be_chordal_env_t*)data;
605         ir_node          *irn;
606
607         for (irn = sched_first(block); !sched_is_end(irn);) {
608                 be_insn_t *insn = chordal_scan_insn(env, irn);
609                 irn             = insn->next_insn;
610
611                 if (!insn->has_constraints)
612                         continue;
613
614                 pre_process_constraints(env, &insn);
615         }
616 }
617
618 static void be_pbqp_coloring(be_chordal_env_t *env)
619 {
620         ir_graph                    *irg            = env->irg;
621         const arch_register_class_t *cls            = env->cls;
622         be_lv_t                     *lv             = NULL;
623         plist_element_t             *element        = NULL;
624         unsigned                     colors_n       = arch_register_class_n_regs(cls);
625         be_pbqp_alloc_env_t          pbqp_alloc_env;
626         unsigned                     col;
627         unsigned                     row;
628         pbqp_matrix_t               *ife_matrix;
629         num                          solution;
630 #if KAPS_DUMP
631         FILE                        *file_before;
632 #endif
633 #if TIMER
634         ir_timer_t *t_ra_pbqp_alloc_create     = ir_timer_new();
635         ir_timer_t *t_ra_pbqp_alloc_solve      = ir_timer_new();
636         ir_timer_t *t_ra_pbqp_alloc_create_aff = ir_timer_new();
637
638         printf("#### ----- === Allocating registers of %s (%s) ===\n", cls->name, get_entity_name(get_irg_entity(irg)));
639 #endif
640         lv = be_assure_liveness(irg);
641         be_liveness_assure_sets(lv);
642         be_liveness_assure_chk(lv);
643
644         /* insert perms */
645         assure_doms(irg);
646         dom_tree_walk_irg(irg, insert_perms, NULL, env);
647
648         /* dump graph after inserting perms */
649         if (env->opts->dump_flags & BE_CH_DUMP_CONSTR) {
650                 char buf[256];
651                 snprintf(buf, sizeof(buf), "-%s-constr", cls->name);
652                 dump_ir_graph(irg, buf);
653         }
654
655
656         /* initialize pbqp allocation data structure */
657         pbqp_alloc_env.pbqp_inst        = alloc_pbqp(get_irg_last_idx(irg));  /* initialize pbqp instance */
658         pbqp_alloc_env.cls              = cls;
659         pbqp_alloc_env.irg              = irg;
660         pbqp_alloc_env.lv               = lv;
661         pbqp_alloc_env.allocatable_regs = bitset_malloc(colors_n);
662         pbqp_alloc_env.rpeo             = plist_new();
663         pbqp_alloc_env.restr_nodes      = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
664         pbqp_alloc_env.ife_edge_num     = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
665         pbqp_alloc_env.env              = env;
666         be_put_allocatable_regs(irg, cls, pbqp_alloc_env.allocatable_regs);
667
668
669         /* create costs matrix template for interference edges */
670         ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
671         /* set costs */
672         for (row = 0, col = 0; row < colors_n; row++, col++)
673                 pbqp_matrix_set(ife_matrix, row, col, INF_COSTS);
674
675         pbqp_alloc_env.ife_matrix_template = ife_matrix;
676
677
678         if (!use_exec_freq) {
679                 /* create costs matrix template for affinity edges */
680                 pbqp_matrix_t *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
681                 /* set costs */
682                 for (row = 0; row < colors_n; row++) {
683                         for (col = 0; col < colors_n; col++) {
684                                 if (row != col)
685                                         pbqp_matrix_set(afe_matrix, row, col, 2);
686                         }
687                 }
688                 pbqp_alloc_env.aff_matrix_template = afe_matrix;
689         }
690
691
692         /* create pbqp instance */
693 #if TIMER
694         ir_timer_reset_and_start(t_ra_pbqp_alloc_create);
695 #endif
696         assure_doms(irg);
697         dom_tree_walk_irg(irg, create_pbqp_coloring_instance , NULL, &pbqp_alloc_env);
698 #if TIMER
699         ir_timer_stop(t_ra_pbqp_alloc_create);
700 #endif
701
702
703         /* set up affinity edges */
704 #if TIMER
705         ir_timer_reset_and_start(t_ra_pbqp_alloc_create_aff);
706 #endif
707         foreach_plist(pbqp_alloc_env.rpeo, element) {
708                 pbqp_node_t *node = (pbqp_node_t*)element->data;
709                 ir_node     *irn  = get_idx_irn(irg, node->index);
710
711                 create_affinity_edges(irn, &pbqp_alloc_env);
712         }
713 #if TIMER
714         ir_timer_stop(t_ra_pbqp_alloc_create_aff);
715 #endif
716
717
718 #if KAPS_DUMP
719         // dump graph before solving pbqp
720         file_before = my_open(env, "", "-pbqp_coloring.html");
721         set_dumpfile(pbqp_alloc_env.pbqp_inst, file_before);
722 #endif
723
724         /* print out reverse perfect elimination order */
725 #if PRINT_RPEO
726         {
727                 plist_element_t *elements;
728                 foreach_plist(pbqp_alloc_env.rpeo, elements) {
729                         pbqp_node_t *node = elements->data;
730                         printf(" %d(%ld);", node->index, get_idx_irn(irg, node->index)->node_nr);
731                 }
732                 printf("\n");
733         }
734 #endif
735
736         /* solve pbqp instance */
737 #if TIMER
738         ir_timer_reset_and_start(t_ra_pbqp_alloc_solve);
739 #endif
740         if(use_late_decision) {
741                 solve_pbqp_heuristical_co_ld(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
742         }
743         else {
744                 solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
745         }
746 #if TIMER
747         ir_timer_stop(t_ra_pbqp_alloc_solve);
748 #endif
749
750
751         solution = get_solution(pbqp_alloc_env.pbqp_inst);
752         if (solution == INF_COSTS)
753                 panic("No PBQP solution found");
754
755
756         /* assign colors */
757         foreach_plist(pbqp_alloc_env.rpeo, element) {
758                 pbqp_node_t           *node  = (pbqp_node_t*)element->data;
759                 ir_node               *irn   = get_idx_irn(irg, node->index);
760                 num                    color = get_node_solution(pbqp_alloc_env.pbqp_inst, node->index);
761                 const arch_register_t *reg   = arch_register_for_index(cls, color);
762
763                 arch_set_irn_register(irn, reg);
764         }
765
766
767 #if TIMER
768         printf("PBQP alloc create:     %10.3lf msec\n",
769                (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create) / 1000.0);
770         printf("PBQP alloc solve:      %10.3lf msec\n",
771                (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_solve) / 1000.0);
772         printf("PBQP alloc create aff: %10.3lf msec\n",
773                (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create_aff) / 1000.0);
774 #endif
775
776
777         /* free reserved memory */
778 #if KAPS_DUMP
779         fclose(file_before);
780 #endif
781         bitset_free(pbqp_alloc_env.allocatable_regs);
782         free_pbqp(pbqp_alloc_env.pbqp_inst);
783         plist_free(pbqp_alloc_env.rpeo);
784         xfree(pbqp_alloc_env.restr_nodes);
785         xfree(pbqp_alloc_env.ife_edge_num);
786 }
787
788
789 /**
790  * Initializes this module.
791  */
792 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pbqp_coloring)
793 void be_init_pbqp_coloring(void)
794 {
795         lc_opt_entry_t *be_grp       = lc_opt_get_grp(firm_opt_get_root(), "be");
796         lc_opt_entry_t *ra_grp       = lc_opt_get_grp(be_grp, "ra");
797         lc_opt_entry_t *chordal_grp  = lc_opt_get_grp(ra_grp, "chordal");
798         lc_opt_entry_t *coloring_grp = lc_opt_get_grp(chordal_grp, "coloring");
799         lc_opt_entry_t *pbqp_grp     = lc_opt_get_grp(coloring_grp, "pbqp");
800
801         static be_ra_chordal_coloring_t coloring = {
802                 be_pbqp_coloring
803         };
804
805         lc_opt_add_table(pbqp_grp, options);
806         be_register_chordal_coloring("pbqp", &coloring);
807 }