Change bechordal_constraints.h to bechordal_common.h
[libfirm] / ir / be / bepbqpcoloring.c
1 /*
2  * bepbqpalloc.c
3  *
4  *  Created on: Nov 11, 2009
5  *      Author: bersch
6  */
7
8 /*      miscellaneous includes */
9 #include "config.h"
10 #include "debug.h"
11 #include "error.h"
12
13 #include "irdom.h"
14 #include "iredges_t.h"
15 #include "irprintf.h"
16 #include "irgwalk.h"
17 #include "time.h"
18
19 /* libfirm/ir/be includes */
20 #include "bearch.h"
21 #include "beirg.h"
22 #include "besched.h"
23 #include "bemodule.h"
24 #include "bechordal_common.h"
25 #include "bechordal.h"
26 #include "bechordal_t.h"
27 #include "beinsn_t.h"
28 #include "benode.h"
29 #include "belive.h"
30 #include "belive_t.h"
31 #include "beutil.h"
32 #include "plist.h"
33 #include "pqueue.h"
34
35 /* pbqp includes */
36 #include "kaps.h"
37 #include "matrix.h"
38 #include "vector.h"
39 #include "vector_t.h"
40 #include "heuristical.h"
41 #include "pbqp_t.h"
42 #include "html_dumper.h"
43 #include "pbqp_node_t.h"
44 #include "pbqp_node.h"
45
46
47 typedef struct _be_pbqp_alloc_env_t {
48         pbqp                                            *pbqp_inst;             /**< PBQP instance for register allocation */
49         be_irg_t                        *birg;          /**< Back-end IRG session. */
50         ir_graph                        *irg;           /**< The graph under examination. */
51         const arch_register_class_t *cls;                       /**< Current processed register class */
52         be_lv_t                     *lv;
53         bitset_t                    *ignored_regs;
54         pbqp_matrix                                     *ife_matrix_dummy;
55         pbqp_matrix                                     *aff_matrix_dummy;
56         plist_t                                         *rpeo;
57         unsigned                                        *restr_nodes;
58         be_chordal_env_t                        *env;
59 } be_pbqp_alloc_env_t;
60
61
62 #define is_Reg_Phi(irn)         (is_Phi(irn) && mode_is_data(get_irn_mode(irn)))
63 #define get_Perm_src(irn)       (get_irn_n(get_Proj_pred(irn), get_Proj_proj(irn)))
64 #define is_Perm_Proj(irn)       (is_Proj(irn) && be_is_Perm(get_Proj_pred(irn)))
65
66 static inline int is_2addr_code(const arch_register_req_t *req)
67 {
68         return (req->type & arch_register_req_type_should_be_same) != 0;
69 }
70
71
72 #if KAPS_DUMP
73 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
74 {
75         FILE *result;
76         char buf[1024];
77         size_t i, n;
78         char *tu_name;
79
80         n = strlen(env->birg->main_env->cup_name);
81         tu_name = XMALLOCN(char, n + 1);
82         strcpy(tu_name, env->birg->main_env->cup_name);
83         for (i = 0; i < n; ++i)
84                 if (tu_name[i] == '.')
85                         tu_name[i] = '_';
86
87         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
88         xfree(tu_name);
89         result = fopen(buf, "wt");
90         if(result == NULL) {
91                 panic("Couldn't open '%s' for writing.", buf);
92         }
93
94         return result;
95 }
96 #endif
97
98
99 static unsigned create_pbqp_node(be_pbqp_alloc_env_t *pbqp_alloc_env, ir_node *irn) {
100         const arch_register_class_t *cls = pbqp_alloc_env->cls;
101         pbqp     *pbqp_inst              = pbqp_alloc_env->pbqp_inst;
102         bitset_t *ignored_regs           = pbqp_alloc_env->ignored_regs;
103         unsigned  colors_n               = arch_register_class_n_regs(cls);
104         unsigned  cntConstrains          = 0;
105
106         /* create costs vector depending on register constrains */
107         struct vector *costs_vector = vector_alloc(pbqp_inst, colors_n);
108
109         /* set costs depending on register constrains */
110         unsigned idx;
111         for(idx = 0; idx < colors_n; idx++) {
112                 if(bitset_is_set(ignored_regs, idx) || !arch_reg_out_is_allocatable(irn, arch_register_for_index(cls, idx))) {
113                         vector_set(costs_vector, idx, INF_COSTS);
114                         cntConstrains++;
115                 }
116         }
117
118         /* add vector to pbqp node */
119         add_node_costs(pbqp_inst, get_irn_idx(irn), costs_vector);
120
121         /* return number of free selectable registers */
122         return (colors_n - cntConstrains);
123 }
124
125 static void build_graph_walker(ir_node *irn, void *env) {
126         be_pbqp_alloc_env_t         *pbqp_alloc_env = env;
127         pbqp                                            *pbqp_inst              = pbqp_alloc_env->pbqp_inst;
128         const arch_register_class_t *cls            = pbqp_alloc_env->cls;
129         const arch_register_req_t   *req            = arch_get_register_req_out(irn);
130         unsigned pos, max;
131
132         if (arch_irn_consider_in_reg_alloc(cls, irn))
133                 return;
134
135         if (is_Reg_Phi(irn)) { /* Phis */
136                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
137                         ir_node *arg = get_irn_n(irn, pos);
138                         //add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
139
140                         if (!arch_irn_consider_in_reg_alloc(cls, arg))
141                                 continue;
142
143                         /* no edges to itself */
144                         if(irn == arg) {
145                                 continue;
146                         }
147
148                         if(get_edge(pbqp_inst, get_irn_idx(irn), get_irn_idx(arg)) == NULL) {
149                                 /* copy matrix */
150                                 struct pbqp_matrix *matrix = pbqp_matrix_copy(pbqp_inst, pbqp_alloc_env->aff_matrix_dummy);
151                                 /* add costs matrix to affinity edge */
152                                 add_edge_costs(pbqp_inst, get_irn_idx(irn), get_irn_idx(arg) , matrix);
153                         }
154                 }
155         }
156         else if (is_Perm_Proj(irn)) { /* Perms */
157                 ir_node *arg = get_Perm_src(irn);
158                 //add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
159
160                 if (!arch_irn_consider_in_reg_alloc(cls, arg))
161                         return;
162
163                 if(get_edge(pbqp_inst, get_irn_idx(irn), get_irn_idx(arg)) == NULL) {
164                         /* copy matrix */
165                         struct pbqp_matrix *matrix = pbqp_matrix_copy(pbqp_inst, pbqp_alloc_env->aff_matrix_dummy);
166                         /* add costs matrix to affinity edge */
167                         add_edge_costs(pbqp_inst, get_irn_idx(irn), get_irn_idx(arg) , matrix);
168                 }
169         }
170         else { /* 2-address code */
171                 if (is_2addr_code(req)) {
172                         const unsigned other = req->other_same;
173                         int i;
174
175                         for (i = 0; 1U << i <= other; ++i) {
176                                 if (other & (1U << i)) {
177                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
178 //                                      if (!arch_irn_is_ignore(other)) {
179                                                 //add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
180                                                 if (!arch_irn_consider_in_reg_alloc(cls, other))
181                                                         continue;
182
183                                                 /* no edges to itself */
184                                                 if(irn == other) {
185                                                         continue;
186                                                 }
187
188                                                 if(get_edge(pbqp_inst, get_irn_idx(irn), get_irn_idx(other)) == NULL) {
189                                                         /* copy matrix */
190                                                         struct pbqp_matrix *matrix = pbqp_matrix_copy(pbqp_inst, pbqp_alloc_env->aff_matrix_dummy);
191                                                         /* add costs matrix to affinity edge */
192                                                         add_edge_costs(pbqp_inst, get_irn_idx(irn), get_irn_idx(other) , matrix);
193                                                 }
194 //                                      }
195                                 }
196                         }
197                 }
198         }
199 }
200
201 static void create_pbqp_coloring_inst(ir_node *block, void *data) {
202         be_pbqp_alloc_env_t         *pbqp_alloc_env     = data;
203         be_lv_t                     *lv                 = pbqp_alloc_env->lv;
204         const arch_register_class_t *cls                = pbqp_alloc_env->cls;
205         plist_t                                         *rpeo                           = pbqp_alloc_env->rpeo;
206         pbqp                                            *pbqp_inst                      = pbqp_alloc_env->pbqp_inst;
207         unsigned                                        *restr_nodes            = pbqp_alloc_env->restr_nodes;
208         pbqp_matrix                             *ife_matrix_dummy       = pbqp_alloc_env->ife_matrix_dummy;
209         pqueue_t                                        *queue                  = new_pqueue();
210         pqueue_t                                        *restr_nodes_queue      = new_pqueue();
211         plist_t                                         *temp_list              = plist_new();
212         ir_node                     *irn;
213         ir_nodeset_t                 live_nodes;
214
215         /* first, determine the pressure */
216         /* (this is only for compatibility with copymin optimization, it's not needed for pbqp coloring) */
217         pressure(block, pbqp_alloc_env->env);
218
219         /* calculate living nodes for the first step */
220         ir_nodeset_init(&live_nodes);
221         be_liveness_end_of_block(lv, cls, block, &live_nodes);
222
223         /* create pbqp nodes, interference edges and reverse perfect elimination order */
224         sched_foreach_reverse(block, irn) {
225                 ir_node *live, *if_live;
226                 ir_nodeset_iterator_t  iter, iter2;
227
228                 /* create nodes and interference edges */
229                 foreach_ir_nodeset(&live_nodes, live, iter) {
230                         /* create pbqp source node if it dosn't exist */
231                         if(get_node(pbqp_inst, get_irn_idx(live)) == NULL) {
232                                 restr_nodes[get_irn_idx(live)] = create_pbqp_node(pbqp_alloc_env, live);
233                         }
234
235                         iter2 = iter;
236                         for(if_live = ir_nodeset_iterator_next(&iter2); if_live != NULL; if_live = ir_nodeset_iterator_next(&iter2)) {
237                                 /* create pbqp target node if it dosn't exist */
238                                 if(get_node(pbqp_inst, get_irn_idx(if_live)) == NULL) {
239                                         restr_nodes[get_irn_idx(if_live)] = create_pbqp_node(pbqp_alloc_env, if_live);
240                                 }
241                                 else {
242                                         /* no edges to itself */
243                                         if(live == if_live)
244                                                 continue;
245                                         /* only one interference edge between two nodes */
246                                         if(get_edge(pbqp_inst, get_irn_idx(live), get_irn_idx(if_live)))
247                                                 continue;
248                                 }
249
250                                 /* do useful optimization to improve pbqp solving (we can do this because we know our matrix) */
251                                 if(restr_nodes[get_irn_idx(live)] == 1 && restr_nodes[get_irn_idx(if_live)] == 1) {
252                                         unsigned src_idx = vector_get_min_index(get_node(pbqp_inst, get_irn_idx(live))->costs);
253                                         unsigned trg_idx = vector_get_min_index(get_node(pbqp_inst, get_irn_idx(if_live))->costs);
254                                         assert(src_idx != trg_idx && "Interfering nodes could not have the same register!");
255                                         continue;
256                                 }
257                                 if(restr_nodes[get_irn_idx(live)] == 1 || restr_nodes[get_irn_idx(if_live)] == 1) {
258                                         if(restr_nodes[get_irn_idx(live)] == 1) {
259                                                 unsigned idx = vector_get_min_index(get_node(pbqp_inst, get_irn_idx(live))->costs);
260                                                 vector_set(get_node(pbqp_inst, get_irn_idx(if_live))->costs, idx, INF_COSTS);
261                                         }
262                                         else {
263                                                 unsigned idx = vector_get_min_index(get_node(pbqp_inst, get_irn_idx(if_live))->costs);
264                                                 vector_set(get_node(pbqp_inst, get_irn_idx(live))->costs, idx, INF_COSTS);
265                                         }
266                                         continue;
267                                 }
268
269                                 /* copy matrix */
270                                 struct pbqp_matrix *matrix = pbqp_matrix_copy(pbqp_inst, ife_matrix_dummy);
271                                 /* add costs matrix to interference edge */
272                                 add_edge_costs(pbqp_inst, get_irn_idx(live), get_irn_idx(if_live) , matrix);
273                         }
274                 }
275
276                 /* order nodes for perfect elimination order */
277                 if (get_irn_mode(irn) == mode_T) {
278                         plist_element_t *first = plist_first(temp_list);
279                         const ir_edge_t *edge;
280
281                         foreach_out_edge(irn, edge) {
282                                 ir_node *proj = get_edge_src_irn(edge);
283                                 if (!arch_irn_consider_in_reg_alloc(cls, proj))
284                                         continue;
285
286                                 // insert proj node into priority queue (descending by the number of interference edges)
287                                 if(restr_nodes[get_irn_idx(proj)] <= 4/*bitset_is_set(restr_nodes, get_irn_idx(proj))*/) {
288                                         pqueue_put(restr_nodes_queue, proj, pbqp_node_get_degree(get_node(pbqp_inst, get_irn_idx(proj))));
289                                 }
290                                 else {
291                                         pqueue_put(queue,proj, pbqp_node_get_degree(get_node(pbqp_inst, get_irn_idx(proj))));
292                                 }
293
294                         }
295
296                         /* first insert all restricted nodes */
297                         while(!pqueue_empty(restr_nodes_queue)) {
298                                 if(first == NULL) {
299                                         plist_insert_back(temp_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue))));
300                                         first = plist_first(temp_list);
301                                 } else {
302                                         plist_insert_before(temp_list, first, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(restr_nodes_queue))));
303                                 }
304                         }
305
306                         /* insert proj nodes descending by their number of interference edges */
307                         while(!pqueue_empty(queue)) {
308                                 if(first == NULL) {
309                                         plist_insert_back(temp_list, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue))));
310                                         first = plist_first(temp_list);
311                                 } else {
312                                         plist_insert_before(temp_list, first, get_node(pbqp_inst, get_irn_idx(pqueue_pop_front(queue))));
313                                 }
314                         }
315                 }
316                 else {
317                         if (arch_irn_consider_in_reg_alloc(cls, irn)) {
318                                 plist_insert_front(temp_list, get_node(pbqp_inst, get_irn_idx(irn)));
319                         }
320                 }
321
322                 /* get living nodes for next step */
323                 if (!is_Phi(irn)) {
324                         be_liveness_transfer(cls, irn, &live_nodes);
325                 }
326         }
327
328         /* insert nodes into reverse perfect elimination order */
329         plist_element_t *el;
330         foreach_plist(temp_list, el) {
331                 plist_insert_back(rpeo, el->data);
332         }
333
334         /* free reserved memory */
335         ir_nodeset_destroy(&live_nodes);
336         plist_free(temp_list);
337         del_pqueue(queue);
338         del_pqueue(restr_nodes_queue);
339 }
340
341 static void insert_perms(ir_node *block, void *data) {
342         /*
343          * Start silent in the start block.
344          * The silence remains until the first barrier is seen.
345          * Each other block is begun loud.
346          */
347         be_chordal_env_t *env    = data;
348         ir_node          *irn;
349         int               silent = block == get_irg_start_block(get_irn_irg(block));
350
351         /*
352          * If the block is the start block search the barrier and
353          * start handling constraints from there.
354          */
355         for (irn = sched_first(block); !sched_is_end(irn);) {
356                 int silent_old = silent;        /* store old silent value */
357                 if (be_is_Barrier(irn))
358                         silent = !silent;               /* toggle silent flag */
359
360                 be_insn_t *insn         = chordal_scan_insn(env, irn);
361                 irn                                     = insn->next_insn;
362
363                 if (silent_old)
364                         continue;
365
366                 if (!insn->has_constraints)
367                         continue;
368
369                 pre_process_constraints(env, &insn);
370         }
371 }
372
373
374 void be_pbqp_coloring(be_chordal_env_t *env) {
375         ir_graph                      *irg  = env->irg;
376         be_irg_t                      *birg = env->birg;
377         const arch_register_class_t   *cls  = env->cls;
378         unsigned colors_n                                   = arch_register_class_n_regs(cls);
379         be_pbqp_alloc_env_t pbqp_alloc_env;
380         unsigned idx, row, col;
381         be_lv_t *lv;
382
383 //      ir_timer_t *t_ra_pbqp_alloc_create    = ir_timer_register("be_pbqp_alloc_create", "pbqp alloc create");
384 //      ir_timer_t *t_ra_pbqp_alloc_solve     = ir_timer_register("be_pbqp_alloc_solve", "pbqp alloc solve");
385 //      ir_timer_t *t_ra_pbqp_alloc_create_aff  = ir_timer_register("be_pbqp_alloc_create_aff", "pbqp alloc create aff");
386
387         lv = be_assure_liveness(birg);
388         be_liveness_assure_sets(lv);
389         be_liveness_assure_chk(lv);
390
391 //      printf("#### ----- === Allocating registers of %s (%s) ===\n", cls->name, get_entity_name(get_irg_entity(irg)));
392
393         /* insert perms */
394         assure_doms(irg);
395         dom_tree_walk_irg(irg, insert_perms, NULL, env);
396
397         /* dump graph after inserting perms */
398         if (env->opts->dump_flags & BE_CH_DUMP_CONSTR) {
399                 char buf[256];
400                 snprintf(buf, sizeof(buf), "-%s-constr", cls->name);
401                 be_dump(irg, buf, dump_ir_block_graph_sched);
402         }
403
404         /* initialize pbqp allocation data structure */
405         pbqp_alloc_env.pbqp_inst    = alloc_pbqp(get_irg_last_idx(irg));                /* initialize pbqp instance */
406         pbqp_alloc_env.birg         = birg;
407         pbqp_alloc_env.cls          = cls;
408         pbqp_alloc_env.irg          = irg;
409         pbqp_alloc_env.lv           = lv;
410         pbqp_alloc_env.ignored_regs = bitset_malloc(colors_n);
411         pbqp_alloc_env.rpeo                     = plist_new();
412         pbqp_alloc_env.restr_nodes  = XMALLOCNZ(unsigned, get_irg_last_idx(irg));
413         pbqp_alloc_env.env                      = env;
414         be_put_ignore_regs(birg, cls, pbqp_alloc_env.ignored_regs);                             /* get ignored registers */
415
416         /* create costs matrix for interference edges */
417         struct pbqp_matrix *ife_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
418         /* set costs */
419         for(row = 0, col=0; row < colors_n; row++, col++)
420                 pbqp_matrix_set(ife_matrix, row, col, INF_COSTS);
421
422         pbqp_alloc_env.ife_matrix_dummy = ife_matrix;
423
424         /* create costs matrix for affinity edges */
425         struct pbqp_matrix *afe_matrix = pbqp_matrix_alloc(pbqp_alloc_env.pbqp_inst, colors_n, colors_n);
426         /* set costs */
427         for(row = 0; row < colors_n; row++) {
428                 for(col = 0; col < colors_n; col++) {
429                         if(row != col)
430                                 pbqp_matrix_set(afe_matrix, row, col, 2);
431                 }
432         }
433         pbqp_alloc_env.aff_matrix_dummy = afe_matrix;
434
435
436         /* create pbqp instance */
437 //      ir_timer_reset_and_start(t_ra_pbqp_alloc_create);
438         assure_doms(irg);
439         dom_tree_walk_irg(irg, create_pbqp_coloring_inst , NULL, &pbqp_alloc_env);
440 //      ir_timer_stop(t_ra_pbqp_alloc_create);
441
442         /* set up affinity edges */
443 //      ir_timer_reset_and_start(t_ra_pbqp_alloc_create_aff);
444         irg_walk_graph(irg, build_graph_walker, NULL, &pbqp_alloc_env);
445 //      ir_timer_stop(t_ra_pbqp_alloc_create_aff);
446
447 #if KAPS_DUMP
448         // dump graph before solving pbqp
449         FILE *file_before = my_open(env, "", "-pbqp_coloring.html");
450         set_dumpfile(pbqp_alloc_env.pbqp_inst, file_before);
451 #endif
452
453         /* solve pbqp instance */
454 //      ir_timer_reset_and_start(t_ra_pbqp_alloc_solve);
455         solve_pbqp_heuristical_co(pbqp_alloc_env.pbqp_inst,pbqp_alloc_env.rpeo);
456 //      ir_timer_stop(t_ra_pbqp_alloc_solve);
457         num solution = get_solution(pbqp_alloc_env.pbqp_inst);
458         assert(solution != INF_COSTS && "No PBQP solution found");
459
460         plist_element_t *element;
461         foreach_plist(pbqp_alloc_env.rpeo, element) {
462                 pbqp_node *node                    = element->data;
463                 idx                                    = node->index;
464                 ir_node *irn               = get_idx_irn(irg, idx);
465                 num color                  = get_node_solution(pbqp_alloc_env.pbqp_inst, idx);
466                 const arch_register_t *reg = arch_register_for_index(cls, color);
467
468                 arch_set_irn_register(irn, reg);
469         }
470
471 //      printf("%-20s: %8.3lf msec\n" , ir_timer_get_description(t_ra_pbqp_alloc_create), (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create) / 1000.0);
472 //      printf("%-20s: %8.3lf msec\n" , ir_timer_get_description(t_ra_pbqp_alloc_solve), (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_solve) / 1000.0);
473 //      printf("%-20s: %8.3lf msec\n" , ir_timer_get_description(t_ra_pbqp_alloc_create_aff), (double)ir_timer_elapsed_usec(t_ra_pbqp_alloc_create_aff) / 1000.0);
474
475
476         /* free reserved memory */
477 #if KAPS_DUMP
478         fclose(file_before);
479 #endif
480         bitset_free(pbqp_alloc_env.ignored_regs);
481         free_pbqp(pbqp_alloc_env.pbqp_inst);
482         plist_free(pbqp_alloc_env.rpeo);
483         xfree(pbqp_alloc_env.restr_nodes);
484 }
485
486
487 /**
488  * Initializes this module.
489  */
490 void be_init_pbqp_coloring(void) {
491
492         static be_ra_chordal_coloring_t coloring = {
493                 be_pbqp_coloring
494         };
495
496         be_register_chordal_coloring("pbqp", &coloring);
497 }
498
499 BE_REGISTER_MODULE_CONSTRUCTOR(be_pbqp_alloc);