only compile pbqp stuff if pbqp defines are enabled
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "irbitset.h"
46 #include "irphase_t.h"
47 #include "irprintf_t.h"
48
49 #include "bemodule.h"
50 #include "bearch.h"
51 #include "benode_t.h"
52 #include "beutil.h"
53 #include "beifg_t.h"
54 #include "beintlive_t.h"
55 #include "becopyopt_t.h"
56 #include "becopystat.h"
57 #include "belive_t.h"
58 #include "beinsn_t.h"
59 #include "besched.h"
60 #include "bestatevent.h"
61 #include "beirg.h"
62 #include "error.h"
63
64 #include "lc_opts.h"
65 #include "lc_opts_enum.h"
66
67 #define DUMP_BEFORE 1
68 #define DUMP_AFTER  2
69 #define DUMP_APPEL  4
70 #define DUMP_ALL    2 * DUMP_APPEL - 1
71
72 #define COST_FUNC_FREQ     1
73 #define COST_FUNC_LOOP     2
74 #define COST_FUNC_ALL_ONE  3
75
76 static unsigned   dump_flags  = 0;
77 static unsigned   style_flags = 0;
78 static unsigned   do_stats    = 0;
79 static cost_fct_t cost_func   = co_get_costs_exec_freq;
80 static unsigned   algo        = CO_ALGO_HEUR4;
81 static int        improve     = 1;
82
83 static const lc_opt_enum_mask_items_t dump_items[] = {
84         { "before",  DUMP_BEFORE },
85         { "after",   DUMP_AFTER  },
86         { "appel",   DUMP_APPEL  },
87         { "all",     DUMP_ALL    },
88         { NULL,      0 }
89 };
90
91 static const lc_opt_enum_mask_items_t style_items[] = {
92         { "color",   CO_IFG_DUMP_COLORS },
93         { "labels",  CO_IFG_DUMP_LABELS },
94         { "constr",  CO_IFG_DUMP_CONSTR },
95         { "shape",   CO_IFG_DUMP_SHAPE  },
96         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
97         { NULL,      0 }
98 };
99
100 static const lc_opt_enum_mask_items_t algo_items[] = {
101         { "none",   CO_ALGO_NONE  },
102         { "heur",   CO_ALGO_HEUR  },
103         { "heur2",  CO_ALGO_HEUR2 },
104         { "heur3",  CO_ALGO_HEUR3 },
105         { "heur4",  CO_ALGO_HEUR4 },
106         { "ilp",    CO_ALGO_ILP   },
107         { "pbqp",   CO_ALGO_PBQP  },
108         { NULL,     0 }
109 };
110
111 typedef int (*opt_funcptr)(void);
112
113 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
114         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
115         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
116         { "one",    (opt_funcptr) co_get_costs_all_one },
117         { NULL,     NULL }
118 };
119
120 static lc_opt_enum_mask_var_t dump_var = {
121         &dump_flags, dump_items
122 };
123
124 static lc_opt_enum_mask_var_t style_var = {
125         &style_flags, style_items
126 };
127
128 static lc_opt_enum_mask_var_t algo_var = {
129         &algo, algo_items
130 };
131
132 static lc_opt_enum_func_ptr_var_t cost_func_var = {
133         (opt_funcptr*) &cost_func, cost_func_items
134 };
135
136 static const lc_opt_table_entry_t options[] = {
137         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
138         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
139         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
140         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
141         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
142         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
143         LC_OPT_LAST
144 };
145
146 /* Insert additional options registration functions here. */
147 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
148 extern void be_co2_register_options(lc_opt_entry_t *grp);
149 extern void be_co3_register_options(lc_opt_entry_t *grp);
150
151 void be_init_copycoal(void)
152 {
153         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
154         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
155         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
156         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
157
158         lc_opt_add_table(co_grp, options);
159 }
160
161 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
162
163 #undef QUICK_AND_DIRTY_HACK
164
165 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
166 {
167         if (env->ifg)
168                 return be_ifg_connected(env->ifg, a, b);
169         else
170                 return be_values_interfere(env->birg->lv, a, b);
171 }
172
173
174 /******************************************************************************
175     _____                           _
176    / ____|                         | |
177   | |  __  ___ _ __   ___ _ __ __ _| |
178   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
179   | |__| |  __/ | | |  __/ | | (_| | |
180    \_____|\___|_| |_|\___|_|  \__,_|_|
181
182  ******************************************************************************/
183
184 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
185
186
187 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
188 {
189         const char *s1, *s2, *s3;
190         int len;
191         copy_opt_t *co;
192
193         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
194
195         co = XMALLOCZ(copy_opt_t);
196         co->cenv      = chordal_env;
197         co->irg       = chordal_env->irg;
198         co->cls       = chordal_env->cls;
199         co->get_costs = get_costs;
200
201         s1 = get_irp_name();
202         s2 = get_entity_name(get_irg_entity(co->irg));
203         s3 = chordal_env->cls->name;
204         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
205         co->name = XMALLOCN(char, len);
206         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
207
208         return co;
209 }
210
211 void free_copy_opt(copy_opt_t *co) {
212         xfree(co->name);
213         free(co);
214 }
215
216 /**
217  * Checks if a node is optimizable, viz. has something to do with coalescing
218  * @param irn  The irn to check
219  */
220 static int co_is_optimizable_root(ir_node *irn)
221 {
222         const arch_register_req_t *req;
223         const arch_register_t     *reg;
224
225         if (arch_irn_is_ignore(irn))
226                 return 0;
227
228         reg = arch_get_irn_register(irn);
229         if (arch_register_type_is(reg, ignore))
230                 return 0;
231
232         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
233                 return 1;
234
235         req = arch_get_register_req_out(irn);
236         if (is_2addr_code(req))
237                 return 1;
238
239         return 0;
240 }
241
242 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
243         int cost = 0;
244         ir_loop *loop;
245         ir_node *root_block = get_nodes_block(root);
246         (void) co;
247         (void) arg;
248
249         if (is_Phi(root)) {
250                 /* for phis the copies are placed in the corresponding pred-block */
251                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
252         } else {
253                 /* a perm places the copy in the same block as it resides */
254                 loop = get_irn_loop(root_block);
255         }
256         if (loop) {
257                 int d = get_loop_depth(loop);
258                 cost = d*d;
259         }
260         return 1+cost;
261 }
262
263 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
264         int res;
265         ir_node *root_bl = get_nodes_block(root);
266         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
267         (void) arg;
268         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
269
270         /* don't allow values smaller than one. */
271         return res < 1 ? 1 : res;
272 }
273
274
275 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
276         (void) co;
277         (void) root;
278         (void) arg;
279         (void) pos;
280         return 1;
281 }
282
283 /******************************************************************************
284    ____        _   _    _       _ _          _____ _
285   / __ \      | | | |  | |     (_) |        / ____| |
286  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
287  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
288  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
289   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
290         | |                                                            __/ |
291         |_|                                                           |___/
292  ******************************************************************************/
293
294 /**
295  * Determines a maximum weighted independent set with respect to
296  * the interference and conflict edges of all nodes in a qnode.
297  */
298 static int ou_max_ind_set_costs(unit_t *ou) {
299         be_chordal_env_t *chordal_env = ou->co->cenv;
300         ir_node **safe, **unsafe;
301         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
302         bitset_t *curr;
303         bitset_pos_t pos;
304         int max, curr_weight, best_weight = 0;
305
306         /* assign the nodes into two groups.
307          * safe: node has no interference, hence it is in every max stable set.
308          * unsafe: node has an interference
309          */
310         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
311         safe_costs   = 0;
312         safe_count   = 0;
313         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
314         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
315         unsafe_count = 0;
316         for(i=1; i<ou->node_count; ++i) {
317                 int is_safe = 1;
318                 for(o=1; o<ou->node_count; ++o) {
319                         if (i==o)
320                                 continue;
321                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
322                                 unsafe_costs[unsafe_count] = ou->costs[i];
323                                 unsafe[unsafe_count] = ou->nodes[i];
324                                 ++unsafe_count;
325                                 is_safe = 0;
326                                 break;
327                         }
328                 }
329                 if (is_safe) {
330                         safe_costs += ou->costs[i];
331                         safe[safe_count++] = ou->nodes[i];
332                 }
333         }
334
335
336         /* now compute the best set out of the unsafe nodes*/
337         if (unsafe_count > MIS_HEUR_TRIGGER) {
338                 bitset_t *best = bitset_alloca(unsafe_count);
339                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
340                 for (i=0; i<unsafe_count; ++i) {
341                         bitset_set(best, i);
342                         /* check if it is a stable set */
343                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
344                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
345                                         bitset_clear(best, i); /* clear the bit and try next one */
346                                         break;
347                                 }
348                 }
349                 /* compute the weight */
350                 bitset_foreach(best, pos)
351                         best_weight += unsafe_costs[pos];
352         } else {
353                 /* Exact Algorithm: Brute force */
354                 curr = bitset_alloca(unsafe_count);
355                 bitset_set_all(curr);
356                 while ((max = bitset_popcnt(curr)) != 0) {
357                         /* check if curr is a stable set */
358                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
359                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
360                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
361                                                         goto no_stable_set;
362
363                         /* if we arrive here, we have a stable set */
364                         /* compute the weigth of the stable set*/
365                         curr_weight = 0;
366                         bitset_foreach(curr, pos)
367                                 curr_weight += unsafe_costs[pos];
368
369                         /* any better ? */
370                         if (curr_weight > best_weight) {
371                                 best_weight = curr_weight;
372                         }
373
374         no_stable_set:
375                         bitset_minus1(curr);
376                 }
377         }
378
379         return safe_costs+best_weight;
380 }
381
382 static void co_collect_units(ir_node *irn, void *env)
383 {
384         const arch_register_req_t *req = arch_get_register_req_out(irn);
385         copy_opt_t                *co  = env;
386         unit_t *unit;
387
388         if (req->cls != co->cls)
389                 return;
390         if (!co_is_optimizable_root(irn))
391                 return;
392
393         /* Init a new unit */
394         unit = XMALLOCZ(unit_t);
395         unit->co = co;
396         unit->node_count = 1;
397         INIT_LIST_HEAD(&unit->queue);
398
399         /* Phi with some/all of its arguments */
400         if (is_Reg_Phi(irn)) {
401                 int i, arity;
402
403                 /* init */
404                 arity = get_irn_arity(irn);
405                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
406                 unit->costs = XMALLOCN(int,      arity + 1);
407                 unit->nodes[0] = irn;
408
409                 /* fill */
410                 for (i=0; i<arity; ++i) {
411                         int o, arg_pos;
412                         ir_node *arg = get_irn_n(irn, i);
413
414                         assert(arch_get_irn_reg_class_out(arg) == co->cls && "Argument not in same register class.");
415                         if (arg == irn)
416                                 continue;
417                         if (nodes_interfere(co->cenv, irn, arg)) {
418                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
419                                 continue;
420                         }
421
422                         /* Else insert the argument of the phi to the members of this ou */
423                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
424
425                         if (arch_irn_is_ignore(arg))
426                                 continue;
427
428                         /* Check if arg has occurred at a prior position in the arg/list */
429                         arg_pos = 0;
430                         for (o=1; o<unit->node_count; ++o) {
431                                 if (unit->nodes[o] == arg) {
432                                         arg_pos = o;
433                                         break;
434                                 }
435                         }
436
437                         if (!arg_pos) { /* a new argument */
438                                 /* insert node, set costs */
439                                 unit->nodes[unit->node_count] = arg;
440                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
441                                 unit->node_count++;
442                         } else { /* arg has occurred before in same phi */
443                                 /* increase costs for existing arg */
444                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
445                         }
446                 }
447                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
448                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
449         } else if (is_Perm_Proj(irn)) {
450                 /* Proj of a perm with corresponding arg */
451                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
452                 unit->nodes = XMALLOCN(ir_node*, 2);
453                 unit->costs = XMALLOCN(int,      2);
454                 unit->node_count = 2;
455                 unit->nodes[0] = irn;
456                 unit->nodes[1] = get_Perm_src(irn);
457                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
458         } else {
459                 /* Src == Tgt of a 2-addr-code instruction */
460                 if (is_2addr_code(req)) {
461                         const unsigned other = req->other_same;
462                         int            count = 0;
463                         int            i;
464
465                         for (i = 0; (1U << i) <= other; ++i) {
466                                 if (other & (1U << i)) {
467                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
468                                         if (arch_irn_is_ignore(o))
469                                                 continue;
470                                         if (nodes_interfere(co->cenv, irn, o))
471                                                 continue;
472                                         ++count;
473                                 }
474                         }
475
476                         if (count != 0) {
477                                 int k = 0;
478                                 ++count;
479                                 unit->nodes = XMALLOCN(ir_node*, count);
480                                 unit->costs = XMALLOCN(int,      count);
481                                 unit->node_count = count;
482                                 unit->nodes[k++] = irn;
483
484                                 for (i = 0; 1U << i <= other; ++i) {
485                                         if (other & (1U << i)) {
486                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
487                                                 if (!arch_irn_is_ignore(o) &&
488                                                                 !nodes_interfere(co->cenv, irn, o)) {
489                                                         unit->nodes[k] = o;
490                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
491                                                         ++k;
492                                                 }
493                                         }
494                                 }
495                         }
496                 } else {
497                         assert(0 && "This is not an optimizable node!");
498                 }
499         }
500
501         /* Insert the new unit at a position according to its costs */
502         if (unit->node_count > 1) {
503                 int i;
504                 struct list_head *tmp;
505
506                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
507                 for(i=1; i<unit->node_count; ++i) {
508                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
509                         unit->all_nodes_costs += unit->costs[i];
510                 }
511
512                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
513                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
514                 /* Insert the new ou according to its sort_key */
515                 tmp = &co->units;
516                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
517                         tmp = tmp->next;
518                 list_add(&unit->units, tmp);
519         } else {
520                 free(unit);
521         }
522 }
523
524 #ifdef QUICK_AND_DIRTY_HACK
525
526 static int compare_ous(const void *k1, const void *k2) {
527         const unit_t *u1 = *((const unit_t **) k1);
528         const unit_t *u2 = *((const unit_t **) k2);
529         int i, o, u1_has_constr, u2_has_constr;
530         arch_register_req_t req;
531
532         /* Units with constraints come first */
533         u1_has_constr = 0;
534         for (i=0; i<u1->node_count; ++i) {
535                 arch_get_register_req_out(&req, u1->nodes[i]);
536                 if (arch_register_req_is(&req, limited)) {
537                         u1_has_constr = 1;
538                         break;
539                 }
540         }
541
542         u2_has_constr = 0;
543         for (i=0; i<u2->node_count; ++i) {
544                 arch_get_register_req_out(&req, u2->nodes[i]);
545                 if (arch_register_req_is(&req, limited)) {
546                         u2_has_constr = 1;
547                         break;
548                 }
549         }
550
551         if (u1_has_constr != u2_has_constr)
552                 return u2_has_constr - u1_has_constr;
553
554         /* Now check, whether the two units are connected */
555 #if 0
556         for (i=0; i<u1->node_count; ++i)
557                 for (o=0; o<u2->node_count; ++o)
558                         if (u1->nodes[i] == u2->nodes[o])
559                                 return 0;
560 #endif
561
562         /* After all, the sort key decides. Greater keys come first. */
563         return u2->sort_key - u1->sort_key;
564
565 }
566
567 /**
568  * Sort the ou's according to constraints and their sort_key
569  */
570 static void co_sort_units(copy_opt_t *co) {
571         int i, count = 0, costs;
572         unit_t *ou, **ous;
573
574         /* get the number of ous, remove them form the list and fill the array */
575         list_for_each_entry(unit_t, ou, &co->units, units)
576                 count++;
577         ous = ALLOCAN(unit_t, count);
578
579         costs = co_get_max_copy_costs(co);
580
581         i = 0;
582         list_for_each_entry(unit_t, ou, &co->units, units)
583                 ous[i++] = ou;
584
585         INIT_LIST_HEAD(&co->units);
586
587         assert(count == i && list_empty(&co->units));
588
589         for (i=0; i<count; ++i)
590                 ir_printf("%+F\n", ous[i]->nodes[0]);
591
592         qsort(ous, count, sizeof(*ous), compare_ous);
593
594         ir_printf("\n\n");
595         for (i=0; i<count; ++i)
596                 ir_printf("%+F\n", ous[i]->nodes[0]);
597
598         /* reinsert into list in correct order */
599         for (i=0; i<count; ++i)
600                 list_add_tail(&ous[i]->units, &co->units);
601
602         assert(costs == co_get_max_copy_costs(co));
603 }
604 #endif
605
606 void co_build_ou_structure(copy_opt_t *co) {
607         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
608         INIT_LIST_HEAD(&co->units);
609         irg_walk_graph(co->irg, co_collect_units, NULL, co);
610 #ifdef QUICK_AND_DIRTY_HACK
611         co_sort_units(co);
612 #endif
613 }
614
615 void co_free_ou_structure(copy_opt_t *co) {
616         unit_t *curr, *tmp;
617         ASSERT_OU_AVAIL(co);
618         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
619                 xfree(curr->nodes);
620                 xfree(curr->costs);
621                 xfree(curr);
622         }
623         co->units.next = NULL;
624 }
625
626 /* co_solve_heuristic() is implemented in becopyheur.c */
627
628 int co_get_max_copy_costs(const copy_opt_t *co) {
629         int i, res = 0;
630         unit_t *curr;
631
632         ASSERT_OU_AVAIL(co);
633
634         list_for_each_entry(unit_t, curr, &co->units, units) {
635                 res += curr->inevitable_costs;
636                 for (i=1; i<curr->node_count; ++i)
637                         res += curr->costs[i];
638         }
639         return res;
640 }
641
642 int co_get_inevit_copy_costs(const copy_opt_t *co) {
643         int res = 0;
644         unit_t *curr;
645
646         ASSERT_OU_AVAIL(co);
647
648         list_for_each_entry(unit_t, curr, &co->units, units)
649                 res += curr->inevitable_costs;
650         return res;
651 }
652
653 int co_get_copy_costs(const copy_opt_t *co) {
654         int i, res = 0;
655         unit_t *curr;
656
657         ASSERT_OU_AVAIL(co);
658
659         list_for_each_entry(unit_t, curr, &co->units, units) {
660                 int root_col = get_irn_col(curr->nodes[0]);
661                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
662                 res += curr->inevitable_costs;
663                 for (i=1; i<curr->node_count; ++i) {
664                         int arg_col = get_irn_col(curr->nodes[i]);
665                         if (root_col != arg_col) {
666                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
667                                 res += curr->costs[i];
668                         }
669                 }
670         }
671         return res;
672 }
673
674 int co_get_lower_bound(const copy_opt_t *co) {
675         int res = 0;
676         unit_t *curr;
677
678         ASSERT_OU_AVAIL(co);
679
680         list_for_each_entry(unit_t, curr, &co->units, units)
681                 res += curr->inevitable_costs + curr->min_nodes_costs;
682         return res;
683 }
684
685 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
686 {
687         bitset_t *seen = bitset_irg_malloc(co->irg);
688         affinity_node_t *an;
689
690         memset(stat, 0, sizeof(stat[0]));
691
692         /* count affinity edges. */
693         co_gs_foreach_aff_node(co, an) {
694                 neighb_t *neigh;
695                 stat->aff_nodes += 1;
696                 bitset_add_irn(seen, an->irn);
697                 co_gs_foreach_neighb(an, neigh) {
698                         if(!bitset_contains_irn(seen, neigh->irn)) {
699                                 stat->aff_edges += 1;
700                                 stat->max_costs += neigh->costs;
701
702                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
703                                         stat->costs += neigh->costs;
704                                         stat->unsatisfied_edges += 1;
705                                 }
706
707                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
708                                         stat->aff_int += 1;
709                                         stat->inevit_costs += neigh->costs;
710                                 }
711
712                         }
713                 }
714         }
715
716         bitset_free(seen);
717 }
718
719 /******************************************************************************
720    _____                 _        _____ _
721   / ____|               | |      / ____| |
722  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
723  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
724  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
725   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
726                   | |                                       __/ |
727                   |_|                                      |___/
728  ******************************************************************************/
729
730 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
731         const affinity_node_t *n1 = k1;
732         const affinity_node_t *n2 = k2;
733         (void) size;
734
735         return (n1->irn != n2->irn);
736 }
737
738 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
739         affinity_node_t new_node, *node;
740         neighb_t        *nbr;
741         int             allocnew = 1;
742
743         new_node.irn        = n1;
744         new_node.degree     = 0;
745         new_node.neighbours = NULL;
746         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
747
748         for (nbr = node->neighbours; nbr; nbr = nbr->next)
749                 if (nbr->irn == n2) {
750                         allocnew = 0;
751                         break;
752                 }
753
754         /* if we did not find n2 in n1's neighbourhood insert it */
755         if (allocnew) {
756                 nbr        = OALLOC(&co->obst, neighb_t);
757                 nbr->irn   = n2;
758                 nbr->costs = 0;
759                 nbr->next  = node->neighbours;
760
761                 node->neighbours = nbr;
762                 node->degree++;
763         }
764
765         /* now nbr points to n1's neighbour-entry of n2 */
766         nbr->costs += costs;
767 }
768
769 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
770         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
771                 add_edge(co, n1, n2, costs);
772                 add_edge(co, n2, n1, costs);
773         }
774 }
775
776 static void build_graph_walker(ir_node *irn, void *env) {
777         const arch_register_req_t *req = arch_get_register_req_out(irn);
778         copy_opt_t                *co  = env;
779         int pos, max;
780         const arch_register_t *reg;
781
782         if (req->cls != co->cls || arch_irn_is_ignore(irn))
783                 return;
784
785         reg = arch_get_irn_register(irn);
786         if (arch_register_type_is(reg, ignore))
787                 return;
788
789         if (is_Reg_Phi(irn)) { /* Phis */
790                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
791                         ir_node *arg = get_irn_n(irn, pos);
792                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
793                 }
794         } else if (is_Perm_Proj(irn)) { /* Perms */
795                 ir_node *arg = get_Perm_src(irn);
796                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
797         } else { /* 2-address code */
798                 if (is_2addr_code(req)) {
799                         const unsigned other = req->other_same;
800                         int i;
801
802                         for (i = 0; 1U << i <= other; ++i) {
803                                 if (other & (1U << i)) {
804                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
805                                         if (!arch_irn_is_ignore(other))
806                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
807                                 }
808                         }
809                 }
810         }
811 }
812
813 void co_build_graph_structure(copy_opt_t *co) {
814         obstack_init(&co->obst);
815         co->nodes = new_set(compare_affinity_node_t, 32);
816
817         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
818 }
819
820 void co_free_graph_structure(copy_opt_t *co) {
821         ASSERT_GS_AVAIL(co);
822
823         del_set(co->nodes);
824         obstack_free(&co->obst, NULL);
825         co->nodes = NULL;
826 }
827
828 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
829
830 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
831         affinity_node_t new_node, *n;
832
833         ASSERT_GS_AVAIL(co);
834
835         new_node.irn = irn;
836         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
837         if (n) {
838                 return (n->degree > 0);
839         } else
840                 return 0;
841 }
842
843 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
844 {
845         ir_node *nodes[]  = { a, b };
846         bitset_t *constr[] = { NULL, NULL };
847         int j;
848
849         constr[0] = bitset_alloca(co->cls->n_regs);
850         constr[1] = bitset_alloca(co->cls->n_regs);
851
852         for (j = 0; j < 2; ++j) {
853                 const arch_register_req_t *req = arch_get_register_req_out(nodes[j]);
854                 if(arch_register_req_is(req, limited))
855                         rbitset_copy_to_bitset(req->limited, constr[j]);
856                 else
857                         bitset_set_all(constr[j]);
858
859         }
860
861         return !bitset_intersect(constr[0], constr[1]);
862 }
863
864 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
865 {
866         be_ifg_t *ifg       = co->cenv->ifg;
867         int      *color_map = ALLOCAN(int, co->cls->n_regs);
868         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
869
870         ir_node *irn;
871         void *it, *nit;
872         int n, n_regs;
873         unsigned i;
874
875         n_regs = 0;
876         for(i = 0; i < co->cls->n_regs; ++i) {
877                 const arch_register_t *reg = &co->cls->regs[i];
878                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
879         }
880
881         /*
882          * n contains the first node number.
883          * the values below n are the pre-colored register nodes
884          */
885
886         it  = be_ifg_nodes_iter_alloca(ifg);
887         nit = be_ifg_neighbours_iter_alloca(ifg);
888
889         n = n_regs;
890         be_ifg_foreach_node(ifg, it, irn) {
891                 if (arch_irn_is_ignore(irn))
892                         continue;
893                 node_map[get_irn_idx(irn)] = n++;
894         }
895
896         fprintf(f, "%d %d\n", n, n_regs);
897
898         be_ifg_foreach_node(ifg, it, irn) {
899                 if (!arch_irn_is_ignore(irn)) {
900                         int idx                        = node_map[get_irn_idx(irn)];
901                         affinity_node_t           *a   = get_affinity_info(co, irn);
902                         const arch_register_req_t *req = arch_get_register_req_out(irn);
903                         ir_node                   *adj;
904
905                         if(arch_register_req_is(req, limited)) {
906                                 for(i = 0; i < co->cls->n_regs; ++i) {
907                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
908                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
909                                 }
910                         }
911
912                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
913                                 if (!arch_irn_is_ignore(adj) &&
914                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
915                                         int adj_idx = node_map[get_irn_idx(adj)];
916                                         if(idx < adj_idx)
917                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
918                                 }
919                         }
920
921                         if(a) {
922                                 neighb_t *n;
923
924                                 co_gs_foreach_neighb(a, n) {
925                                         if (!arch_irn_is_ignore(n->irn)) {
926                                                 int n_idx = node_map[get_irn_idx(n->irn)];
927                                                 if(idx < n_idx)
928                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
929                                         }
930                                 }
931                         }
932                 }
933         }
934
935         xfree(node_map);
936 }
937
938 /*
939          ___ _____ ____   ____   ___ _____   ____                        _
940         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
941          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
942          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
943         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
944                                                                   |_|            |___/
945 */
946
947 static const char *get_dot_color_name(size_t col)
948 {
949         static const char *names[] = {
950                 "blue",
951                 "red",
952                 "green",
953                 "yellow",
954                 "cyan",
955                 "magenta",
956                 "orange",
957                 "chocolate",
958                 "beige",
959                 "navy",
960                 "darkgreen",
961                 "darkred",
962                 "lightPink",
963                 "chartreuse",
964                 "lightskyblue",
965                 "linen",
966                 "pink",
967                 "lightslateblue",
968                 "mintcream",
969                 "red",
970                 "darkolivegreen",
971                 "mediumblue",
972                 "mistyrose",
973                 "salmon",
974                 "darkseagreen",
975                 "mediumslateblue"
976                 "moccasin",
977                 "tomato",
978                 "forestgreen",
979                 "darkturquoise",
980                 "palevioletred"
981         };
982
983         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
984 }
985
986 typedef struct _co_ifg_dump_t {
987         const copy_opt_t *co;
988         unsigned flags;
989 } co_ifg_dump_t;
990
991 static void ifg_dump_graph_attr(FILE *f, void *self)
992 {
993         (void) self;
994         fprintf(f, "overlap=scale");
995 }
996
997 static int ifg_is_dump_node(void *self, ir_node *irn)
998 {
999         (void)self;
1000         return !arch_irn_is_ignore(irn);
1001 }
1002
1003 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1004 {
1005         co_ifg_dump_t             *env     = self;
1006         const arch_register_t     *reg     = arch_get_irn_register(irn);
1007         const arch_register_req_t *req     = arch_get_register_req_out(irn);
1008         int                        limited = arch_register_req_is(req, limited);
1009
1010         if(env->flags & CO_IFG_DUMP_LABELS) {
1011                 ir_fprintf(f, "label=\"%+F", irn);
1012
1013                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1014                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1015                         rbitset_copy_to_bitset(req->limited, bs);
1016                         ir_fprintf(f, "\\n%B", bs);
1017                 }
1018                 ir_fprintf(f, "\" ");
1019         } else {
1020                 fprintf(f, "label=\"\" shape=point " );
1021         }
1022
1023         if(env->flags & CO_IFG_DUMP_SHAPE)
1024                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1025
1026         if(env->flags & CO_IFG_DUMP_COLORS)
1027                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1028 }
1029
1030 static void ifg_dump_at_end(FILE *file, void *self)
1031 {
1032         co_ifg_dump_t *env = self;
1033         affinity_node_t *a;
1034
1035         co_gs_foreach_aff_node(env->co, a) {
1036                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1037                 unsigned aidx = get_irn_idx(a->irn);
1038                 neighb_t *n;
1039
1040                 co_gs_foreach_neighb(a, n) {
1041                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1042                         unsigned nidx = get_irn_idx(n->irn);
1043
1044                         if(aidx < nidx) {
1045                                 const char *color = nr == ar ? "blue" : "red";
1046                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1047                                 if(env->flags & CO_IFG_DUMP_LABELS)
1048                                         fprintf(file, "label=\"%d\" ", n->costs);
1049                                 if(env->flags & CO_IFG_DUMP_COLORS)
1050                                         fprintf(file, "color=%s ", color);
1051                                 else
1052                                         fprintf(file, "style=dotted");
1053                                 fprintf(file, "];\n");
1054                         }
1055                 }
1056         }
1057 }
1058
1059
1060 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1061         ifg_is_dump_node,
1062         ifg_dump_graph_attr,
1063         ifg_dump_node_attr,
1064         NULL,
1065         NULL,
1066         ifg_dump_at_end
1067 };
1068
1069
1070
1071 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1072 {
1073         co_ifg_dump_t cod;
1074
1075         cod.co    = co;
1076         cod.flags = flags;
1077         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1078 }
1079
1080
1081 void co_solve_park_moon(copy_opt_t *opt)
1082 {
1083         (void) opt;
1084 }
1085
1086 static int void_algo(copy_opt_t *co)
1087 {
1088         (void) co;
1089         return 0;
1090 }
1091
1092 /*
1093                 _    _                  _ _   _
1094            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1095           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1096          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1097         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1098                            |___/
1099 */
1100
1101 typedef struct {
1102         co_algo_t  *algo;
1103         const char *name;
1104         int        can_improve_existing;
1105 } co_algo_info_t;
1106
1107 static co_algo_info_t algos[] = {
1108         { void_algo,               "none",  0 },
1109         { co_solve_heuristic,      "heur1", 0 },
1110         { co_solve_heuristic_new,  "heur2", 0 },
1111 #ifdef WITH_JVM
1112         { co_solve_heuristic_java, "heur3", 0 },
1113 #else
1114         { NULL,                    "heur3", 0 },
1115 #endif
1116         { co_solve_heuristic_mst,  "heur4", 0 },
1117 #ifdef WITH_ILP
1118         { co_solve_ilp2,           "ilp",   1 },
1119 #else
1120         { NULL,                    "ilp",   1 },
1121 #endif
1122 #ifdef FIRM_KAPS
1123         { co_solve_heuristic_pbqp, "pbqp",  0 },
1124 #endif
1125         { NULL,                    "",      0 }
1126 };
1127
1128 /*
1129     __  __       _         ____       _
1130    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1131    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1132    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1133    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1134
1135 */
1136
1137 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1138 {
1139         FILE *result;
1140         char buf[1024];
1141         size_t i, n;
1142         char *tu_name;
1143
1144         n = strlen(env->birg->main_env->cup_name);
1145         tu_name = XMALLOCN(char, n + 1);
1146         strcpy(tu_name, env->birg->main_env->cup_name);
1147         for (i = 0; i < n; ++i)
1148                 if (tu_name[i] == '.')
1149                         tu_name[i] = '_';
1150
1151
1152         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1153         xfree(tu_name);
1154         result = fopen(buf, "wt");
1155         if(result == NULL) {
1156                 panic("Couldn't open '%s' for writing.", buf);
1157         }
1158
1159         return result;
1160 }
1161
1162 void co_driver(be_chordal_env_t *cenv)
1163 {
1164         ir_timer_t          *timer = ir_timer_register("firm.be.copyopt", "runtime");
1165         co_complete_stats_t before, after;
1166         copy_opt_t          *co;
1167         co_algo_t           *algo_func;
1168         int                 was_optimal = 0;
1169
1170         if (algo >= CO_ALGO_LAST)
1171                 return;
1172
1173         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1174
1175         co = new_copy_opt(cenv, cost_func);
1176         co_build_ou_structure(co);
1177         co_build_graph_structure(co);
1178
1179         co_complete_stats(co, &before);
1180
1181         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1182         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1183         be_stat_ev_ull("co_max_costs",    before.max_costs);
1184         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1185         be_stat_ev_ull("co_aff_int",      before.aff_int);
1186
1187         be_stat_ev_ull("co_init_costs",   before.costs);
1188         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1189
1190         if (dump_flags & DUMP_BEFORE) {
1191                 FILE *f = my_open(cenv, "", "-before.dot");
1192                 co_dump_ifg_dot(co, f, style_flags);
1193                 fclose(f);
1194         }
1195
1196         /* if the algo can improve results, provide an initial solution with heur3 */
1197         if (improve && algos[algo].can_improve_existing) {
1198                 co_complete_stats_t stats;
1199
1200                 /* produce a heuristic solution */
1201 #ifdef WITH_JVM
1202                 co_solve_heuristic_java(co);
1203 #else
1204                 co_solve_heuristic(co);
1205 #endif /* WITH_JVM */
1206
1207                 /* do the stats and provide the current costs */
1208                 co_complete_stats(co, &stats);
1209                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1210         }
1211
1212 #ifdef WITH_JVM
1213         /* start the JVM here so that it does not tamper the timing. */
1214         if (algo == CO_ALGO_HEUR3)
1215                 be_java_coal_start_jvm();
1216 #endif /* WITH_JVM */
1217
1218         algo_func = algos[algo].algo;
1219
1220         /* perform actual copy minimization */
1221         ir_timer_reset_and_start(timer);
1222         was_optimal = algo_func(co);
1223         ir_timer_stop(timer);
1224
1225         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1226         be_stat_ev_ull("co_optimal", was_optimal);
1227
1228         if (dump_flags & DUMP_AFTER) {
1229                 FILE *f = my_open(cenv, "", "-after.dot");
1230                 co_dump_ifg_dot(co, f, style_flags);
1231                 fclose(f);
1232         }
1233
1234         co_complete_stats(co, &after);
1235
1236         if (do_stats) {
1237                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1238                 ulong64 evitable          = after.costs     - after.inevit_costs;
1239
1240                 ir_printf("%30F ", cenv->irg);
1241                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1242
1243                 if(optimizable_costs > 0)
1244                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1245                 else
1246                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1247         }
1248
1249         /* Dump the interference graph in Appel's format. */
1250         if (dump_flags & DUMP_APPEL) {
1251                 FILE *f = my_open(cenv, "", ".apl");
1252                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1253                 co_dump_appel_graph(co, f);
1254                 fclose(f);
1255         }
1256
1257         be_stat_ev_ull("co_after_costs", after.costs);
1258         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1259
1260         co_free_graph_structure(co);
1261         co_free_ou_structure(co);
1262         free_copy_opt(co);
1263 }