The big committ:
[libfirm] / ir / be / becopyopt.c
1 /**
2  * Author:      Daniel Grund
3  * Date:                12.04.2005
4  * Copyright:   (c) Universitaet Karlsruhe
5  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  */
7 #ifdef HAVE_CONFIG_H
8 #include <config.h>
9 #endif
10
11 #ifdef HAVE_ALLOCA_H
12 #include <alloca.h>
13 #endif
14 #ifdef HAVE_MALLOC_H
15 #include <malloc.h>
16 #endif
17
18 #include "execfreq.h"
19 #include "xmalloc.h"
20 #include "debug.h"
21 #include "pmap.h"
22 #include "raw_bitset.h"
23 #include "irgraph.h"
24 #include "irgwalk.h"
25 #include "irprog.h"
26 #include "irloop_t.h"
27 #include "iredges_t.h"
28 #include "phiclass.h"
29 #include "irbitset.h"
30 #include "irphase_t.h"
31 #include "irprintf_t.h"
32
33 #include "bemodule.h"
34 #include "bearch.h"
35 #include "benode_t.h"
36 #include "beutil.h"
37 #include "beifg_t.h"
38 #include "becopyopt_t.h"
39 #include "becopystat.h"
40 #include "belive_t.h"
41 #include "beinsn_t.h"
42 #include "besched_t.h"
43 #include "benodesets.h"
44 #include "bejavacoal.h"
45 #include "bestatevent.h"
46 #include "beirg_t.h"
47 #include "error.h"
48
49 #include <libcore/lc_timing.h>
50 #include <libcore/lc_opts.h>
51 #include <libcore/lc_opts_enum.h>
52
53 #define DUMP_BEFORE 1
54 #define DUMP_AFTER  2
55 #define DUMP_APPEL  4
56 #define DUMP_ALL    2 * DUMP_APPEL - 1
57
58 #define COST_FUNC_FREQ     1
59 #define COST_FUNC_LOOP     2
60 #define COST_FUNC_ALL_ONE  3
61
62 static unsigned dump_flags    = 0;
63 static unsigned style_flags   = 0;
64 static unsigned  do_stats     = 0;
65 static cost_fct_t cost_func   = co_get_costs_exec_freq;
66 static unsigned algo          = CO_ALGO_HEUR2;
67 static int improve            = 1;
68
69 static const lc_opt_enum_mask_items_t dump_items[] = {
70         { "before",  DUMP_BEFORE },
71         { "after",   DUMP_AFTER  },
72         { "appel",   DUMP_APPEL  },
73         { "all",     DUMP_ALL    },
74         { NULL,      0 }
75 };
76
77 static const lc_opt_enum_mask_items_t style_items[] = {
78         { "color",   CO_IFG_DUMP_COLORS },
79         { "labels",  CO_IFG_DUMP_LABELS },
80         { "constr",  CO_IFG_DUMP_CONSTR },
81         { "shape",   CO_IFG_DUMP_SHAPE  },
82         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
83         { NULL,      0 }
84 };
85
86 static const lc_opt_enum_mask_items_t algo_items[] = {
87         { "none",   CO_ALGO_NONE  },
88         { "heur",   CO_ALGO_HEUR  },
89         { "heur2",  CO_ALGO_HEUR2 },
90 #ifdef WITH_JVM
91         { "heur3",  CO_ALGO_HEUR3 },
92 #endif /* WITH_JVM */
93 #ifdef WITH_ILP
94         { "ilp",    CO_ALGO_ILP   },
95 #endif /* WITH_ILP */
96         { NULL,     0 }
97 };
98
99 typedef int (*opt_funcptr)(void);
100
101 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
102         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
103         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
104         { "one",    (opt_funcptr) co_get_costs_all_one },
105         { NULL,     NULL }
106 };
107
108 static lc_opt_enum_mask_var_t dump_var = {
109         &dump_flags, dump_items
110 };
111
112 static lc_opt_enum_mask_var_t style_var = {
113         &style_flags, style_items
114 };
115
116 static lc_opt_enum_mask_var_t algo_var = {
117         &algo, algo_items
118 };
119
120 static lc_opt_enum_func_ptr_var_t cost_func_var = {
121         (opt_funcptr*) &cost_func, cost_func_items
122 };
123
124 static const lc_opt_table_entry_t options[] = {
125         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
126         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
127         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
128         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
129         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
130         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
131         { NULL }
132 };
133
134 /* Insert additional options registration functions here. */
135 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
136 extern void be_co2_register_options(lc_opt_entry_t *grp);
137 extern void be_co3_register_options(lc_opt_entry_t *grp);
138
139 void be_init_copycoal(void)
140 {
141         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
142         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
143         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
144         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
145
146         lc_opt_add_table(co_grp, options);
147 }
148
149 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
150
151 #undef QUICK_AND_DIRTY_HACK
152
153 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
154 {
155         if(env->ifg)
156                 return be_ifg_connected(env->ifg, a, b);
157         else
158                 return values_interfere(env->birg->lv, a, b);
159 }
160
161
162 /******************************************************************************
163     _____                           _
164    / ____|                         | |
165   | |  __  ___ _ __   ___ _ __ __ _| |
166   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
167   | |__| |  __/ | | |  __/ | | (_| | |
168    \_____|\___|_| |_|\___|_|  \__,_|_|
169
170  ******************************************************************************/
171
172 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
173
174
175 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
176 {
177         const char *s1, *s2, *s3;
178         int len;
179         copy_opt_t *co;
180
181         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
182
183         co = xcalloc(1, sizeof(*co));
184         co->cenv      = chordal_env;
185         co->aenv      = chordal_env->birg->main_env->arch_env;
186         co->irg       = chordal_env->irg;
187         co->cls       = chordal_env->cls;
188         co->get_costs = get_costs;
189
190         s1 = get_irp_prog_name();
191         s2 = get_entity_name(get_irg_entity(co->irg));
192         s3 = chordal_env->cls->name;
193         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
194         co->name = xmalloc(len);
195         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
196
197         return co;
198 }
199
200 void free_copy_opt(copy_opt_t *co) {
201         xfree(co->name);
202         free(co);
203 }
204
205 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
206         const arch_register_req_t *req;
207         const arch_register_t *reg;
208
209         if (arch_irn_is(co->aenv, irn, ignore))
210                 return 0;
211
212         reg = arch_get_irn_register(co->aenv, irn);
213         if (arch_register_type_is(reg, ignore))
214                 return 0;
215
216         req = arch_get_register_req(co->aenv, irn, -1);
217         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
218                 return 1;
219
220         return 0;
221 }
222
223 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
224         const ir_edge_t *edge;
225         const arch_register_t *reg;
226
227         assert(0 && "Is buggy and obsolete. Do not use");
228
229         if (arch_irn_is(co->aenv, irn, ignore))
230                 return 0;
231
232         reg = arch_get_irn_register(co->aenv, irn);
233         if (arch_register_type_is(reg, ignore))
234                 return 0;
235
236         foreach_out_edge(irn, edge) {
237                 ir_node *n = edge->src;
238
239                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
240                         const arch_register_req_t *req;
241                         req = arch_get_register_req(co->aenv, n, -1);
242
243                         if(is_Reg_Phi(n) ||
244                            is_Perm(co->aenv, n) ||
245                            (arch_register_req_is(req, should_be_same))) {
246                                 ir_node *other = get_irn_n(irn, req->other_same);
247                                 if(other == irn)
248                                         return 1;
249                         }
250                 }
251         }
252
253         return 0;
254 }
255
256 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
257         int cost = 0;
258         ir_loop *loop;
259         ir_node *root_block = get_nodes_block(root);
260
261         if (is_Phi(root)) {
262                 /* for phis the copies are placed in the corresponding pred-block */
263                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
264         } else {
265                 /* a perm places the copy in the same block as it resides */
266                 loop = get_irn_loop(root_block);
267         }
268         if (loop) {
269                 int d = get_loop_depth(loop);
270                 cost = d*d;
271         }
272         return 1+cost;
273 }
274
275 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
276         int res;
277         ir_node *root_bl = get_nodes_block(root);
278         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
279         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
280
281         /* don't allow values smaller than one. */
282         return res < 1 ? 1 : res;
283 }
284
285
286 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
287         return 1;
288 }
289
290 /******************************************************************************
291    ____        _   _    _       _ _          _____ _
292   / __ \      | | | |  | |     (_) |        / ____| |
293  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
294  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
295  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
296   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
297         | |                                                            __/ |
298         |_|                                                           |___/
299  ******************************************************************************/
300
301 /**
302  * Determines a maximum weighted independent set with respect to
303  * the interference and conflict edges of all nodes in a qnode.
304  */
305 static int ou_max_ind_set_costs(unit_t *ou) {
306         be_chordal_env_t *chordal_env = ou->co->cenv;
307         ir_node **safe, **unsafe;
308         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
309         bitset_t *curr;
310         int max, pos, curr_weight, best_weight = 0;
311
312         /* assign the nodes into two groups.
313          * safe: node has no interference, hence it is in every max stable set.
314          * unsafe: node has an interference
315          */
316         safe = alloca((ou->node_count-1) * sizeof(*safe));
317         safe_costs = 0;
318         safe_count = 0;
319         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
320         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
321         unsafe_count = 0;
322         for(i=1; i<ou->node_count; ++i) {
323                 int is_safe = 1;
324                 for(o=1; o<ou->node_count; ++o) {
325                         if (i==o)
326                                 continue;
327                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
328                                 unsafe_costs[unsafe_count] = ou->costs[i];
329                                 unsafe[unsafe_count] = ou->nodes[i];
330                                 ++unsafe_count;
331                                 is_safe = 0;
332                                 break;
333                         }
334                 }
335                 if (is_safe) {
336                         safe_costs += ou->costs[i];
337                         safe[safe_count++] = ou->nodes[i];
338                 }
339         }
340
341
342         /* now compute the best set out of the unsafe nodes*/
343         if (unsafe_count > MIS_HEUR_TRIGGER) {
344                 bitset_t *best = bitset_alloca(unsafe_count);
345                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
346                 for (i=0; i<unsafe_count; ++i) {
347                         bitset_set(best, i);
348                         /* check if it is a stable set */
349                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
350                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
351                                         bitset_clear(best, i); /* clear the bit and try next one */
352                                         break;
353                                 }
354                 }
355                 /* compute the weight */
356                 bitset_foreach(best, pos)
357                         best_weight += unsafe_costs[pos];
358         } else {
359                 /* Exact Algorithm: Brute force */
360                 curr = bitset_alloca(unsafe_count);
361                 bitset_set_all(curr);
362                 while ((max = bitset_popcnt(curr)) != 0) {
363                         /* check if curr is a stable set */
364                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
365                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
366                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
367                                                         goto no_stable_set;
368
369                         /* if we arrive here, we have a stable set */
370                         /* compute the weigth of the stable set*/
371                         curr_weight = 0;
372                         bitset_foreach(curr, pos)
373                                 curr_weight += unsafe_costs[pos];
374
375                         /* any better ? */
376                         if (curr_weight > best_weight) {
377                                 best_weight = curr_weight;
378                         }
379
380         no_stable_set:
381                         bitset_minus1(curr);
382                 }
383         }
384
385         return safe_costs+best_weight;
386 }
387
388 static void co_collect_units(ir_node *irn, void *env) {
389         copy_opt_t *co = env;
390         unit_t *unit;
391
392         if (!is_curr_reg_class(co, irn))
393                 return;
394         if (!co_is_optimizable_root(co, irn))
395                 return;
396
397         /* Init a new unit */
398         unit = xcalloc(1, sizeof(*unit));
399         unit->co = co;
400         unit->node_count = 1;
401         INIT_LIST_HEAD(&unit->queue);
402
403         /* Phi with some/all of its arguments */
404         if (is_Reg_Phi(irn)) {
405                 int i, arity;
406
407                 /* init */
408                 arity = get_irn_arity(irn);
409                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
410                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
411                 unit->nodes[0] = irn;
412
413                 /* fill */
414                 for (i=0; i<arity; ++i) {
415                         int o, arg_pos;
416                         ir_node *arg = get_irn_n(irn, i);
417
418                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
419                         if (arg == irn)
420                                 continue;
421                         if (nodes_interfere(co->cenv, irn, arg)) {
422                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
423                                 continue;
424                         }
425
426                         /* Else insert the argument of the phi to the members of this ou */
427                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
428
429                         /* Check if arg has occurred at a prior position in the arg/list */
430                         arg_pos = 0;
431                         for (o=0; o<unit->node_count; ++o)
432                                 if (unit->nodes[o] == arg) {
433                                         arg_pos = o;
434                                         break;
435                                 }
436
437                         if (!arg_pos) { /* a new argument */
438                                 /* insert node, set costs */
439                                 unit->nodes[unit->node_count] = arg;
440                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
441                                 unit->node_count++;
442                         } else { /* arg has occured before in same phi */
443                                 /* increase costs for existing arg */
444                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
445                         }
446                 }
447                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
448                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
449         } else
450
451         /* Proj of a perm with corresponding arg */
452         if (is_Perm_Proj(co->aenv, irn)) {
453                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
454                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
455                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
456                 unit->node_count = 2;
457                 unit->nodes[0] = irn;
458                 unit->nodes[1] = get_Perm_src(irn);
459                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
460         } else {
461                 const arch_register_req_t *req =
462                         arch_get_register_req(co->aenv, irn, -1);
463
464                 /* Src == Tgt of a 2-addr-code instruction */
465                 if (is_2addr_code(req)) {
466                         ir_node *other = get_irn_n(irn, req->other_same);
467                         if (!nodes_interfere(co->cenv, irn, other)) {
468                                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
469                                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
470                                 unit->node_count = 2;
471                                 unit->nodes[0] = irn;
472                                 unit->nodes[1] = other;
473                                 unit->costs[1] = co->get_costs(co, irn, other, -1);
474                         }
475                 } else {
476                         assert(0 && "This is not an optimizable node!");
477                 }
478         }
479
480         /* Insert the new unit at a position according to its costs */
481         if (unit->node_count > 1) {
482                 int i;
483                 struct list_head *tmp;
484
485                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
486                 for(i=1; i<unit->node_count; ++i) {
487                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
488                         unit->all_nodes_costs += unit->costs[i];
489                 }
490
491                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
492                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
493                 /* Insert the new ou according to its sort_key */
494                 tmp = &co->units;
495                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
496                         tmp = tmp->next;
497                 list_add(&unit->units, tmp);
498         } else {
499                 free(unit);
500         }
501 }
502
503 #ifdef QUICK_AND_DIRTY_HACK
504
505 static int compare_ous(const void *k1, const void *k2) {
506         const unit_t *u1 = *((const unit_t **) k1);
507         const unit_t *u2 = *((const unit_t **) k2);
508         int i, o, u1_has_constr, u2_has_constr;
509         arch_register_req_t req;
510         const arch_env_t *aenv = u1->co->aenv;
511
512         /* Units with constraints come first */
513         u1_has_constr = 0;
514         for (i=0; i<u1->node_count; ++i) {
515                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
516                 if (arch_register_req_is(&req, limited)) {
517                         u1_has_constr = 1;
518                         break;
519                 }
520         }
521
522         u2_has_constr = 0;
523         for (i=0; i<u2->node_count; ++i) {
524                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
525                 if (arch_register_req_is(&req, limited)) {
526                         u2_has_constr = 1;
527                         break;
528                 }
529         }
530
531         if (u1_has_constr != u2_has_constr)
532                 return u2_has_constr - u1_has_constr;
533
534         /* Now check, whether the two units are connected */
535 #if 0
536         for (i=0; i<u1->node_count; ++i)
537                 for (o=0; o<u2->node_count; ++o)
538                         if (u1->nodes[i] == u2->nodes[o])
539                                 return 0;
540 #endif
541
542         /* After all, the sort key decides. Greater keys come first. */
543         return u2->sort_key - u1->sort_key;
544
545 }
546
547 /**
548  * Sort the ou's according to constraints and their sort_key
549  */
550 static void co_sort_units(copy_opt_t *co) {
551         int i, count = 0, costs;
552         unit_t *ou, **ous;
553
554         /* get the number of ous, remove them form the list and fill the array */
555         list_for_each_entry(unit_t, ou, &co->units, units)
556                 count++;
557         ous = alloca(count * sizeof(*ous));
558
559         costs = co_get_max_copy_costs(co);
560
561         i = 0;
562         list_for_each_entry(unit_t, ou, &co->units, units)
563                 ous[i++] = ou;
564
565         INIT_LIST_HEAD(&co->units);
566
567         assert(count == i && list_empty(&co->units));
568
569         for (i=0; i<count; ++i)
570                 ir_printf("%+F\n", ous[i]->nodes[0]);
571
572         qsort(ous, count, sizeof(*ous), compare_ous);
573
574         ir_printf("\n\n");
575         for (i=0; i<count; ++i)
576                 ir_printf("%+F\n", ous[i]->nodes[0]);
577
578         /* reinsert into list in correct order */
579         for (i=0; i<count; ++i)
580                 list_add_tail(&ous[i]->units, &co->units);
581
582         assert(costs == co_get_max_copy_costs(co));
583 }
584 #endif
585
586 void co_build_ou_structure(copy_opt_t *co) {
587         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
588         INIT_LIST_HEAD(&co->units);
589         irg_walk_graph(co->irg, co_collect_units, NULL, co);
590 #ifdef QUICK_AND_DIRTY_HACK
591         co_sort_units(co);
592 #endif
593 }
594
595 void co_free_ou_structure(copy_opt_t *co) {
596         unit_t *curr, *tmp;
597         ASSERT_OU_AVAIL(co);
598         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
599                 xfree(curr->nodes);
600                 xfree(curr->costs);
601                 xfree(curr);
602         }
603         co->units.next = NULL;
604 }
605
606 /* co_solve_heuristic() is implemented in becopyheur.c */
607
608 int co_get_max_copy_costs(const copy_opt_t *co) {
609         int i, res = 0;
610         unit_t *curr;
611
612         ASSERT_OU_AVAIL(co);
613
614         list_for_each_entry(unit_t, curr, &co->units, units) {
615                 res += curr->inevitable_costs;
616                 for (i=1; i<curr->node_count; ++i)
617                         res += curr->costs[i];
618         }
619         return res;
620 }
621
622 int co_get_inevit_copy_costs(const copy_opt_t *co) {
623         int res = 0;
624         unit_t *curr;
625
626         ASSERT_OU_AVAIL(co);
627
628         list_for_each_entry(unit_t, curr, &co->units, units)
629                 res += curr->inevitable_costs;
630         return res;
631 }
632
633 int co_get_copy_costs(const copy_opt_t *co) {
634         int i, res = 0;
635         unit_t *curr;
636
637         ASSERT_OU_AVAIL(co);
638
639         list_for_each_entry(unit_t, curr, &co->units, units) {
640                 int root_col = get_irn_col(co, curr->nodes[0]);
641                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
642                 res += curr->inevitable_costs;
643                 for (i=1; i<curr->node_count; ++i) {
644                         int arg_col = get_irn_col(co, curr->nodes[i]);
645                         if (root_col != arg_col) {
646                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
647                                 res += curr->costs[i];
648                         }
649                 }
650         }
651         return res;
652 }
653
654 int co_get_lower_bound(const copy_opt_t *co) {
655         int res = 0;
656         unit_t *curr;
657
658         ASSERT_OU_AVAIL(co);
659
660         list_for_each_entry(unit_t, curr, &co->units, units)
661                 res += curr->inevitable_costs + curr->min_nodes_costs;
662         return res;
663 }
664
665 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
666 {
667         bitset_t *seen = bitset_irg_malloc(co->irg);
668         affinity_node_t *an;
669
670         memset(stat, 0, sizeof(stat[0]));
671
672         /* count affinity edges. */
673         co_gs_foreach_aff_node(co, an) {
674                 neighb_t *neigh;
675                 stat->aff_nodes += 1;
676                 bitset_add_irn(seen, an->irn);
677                 co_gs_foreach_neighb(an, neigh) {
678                         if(!bitset_contains_irn(seen, neigh->irn)) {
679                                 stat->aff_edges += 1;
680                                 stat->max_costs += neigh->costs;
681
682                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
683                                         stat->costs += neigh->costs;
684                                         stat->unsatisfied_edges += 1;
685                                 }
686
687                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
688                                         stat->aff_int += 1;
689                                         stat->inevit_costs += neigh->costs;
690                                 }
691
692                         }
693                 }
694         }
695
696         bitset_free(seen);
697 }
698
699 /******************************************************************************
700    _____                 _        _____ _
701   / ____|               | |      / ____| |
702  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
703  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
704  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
705   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
706                   | |                                       __/ |
707                   |_|                                      |___/
708  ******************************************************************************/
709
710 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
711         const affinity_node_t *n1 = k1;
712         const affinity_node_t *n2 = k2;
713
714         return (n1->irn != n2->irn);
715 }
716
717 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
718         affinity_node_t new_node, *node;
719         neighb_t new_nbr, *nbr;
720         int allocnew;
721
722         new_node.irn        = n1;
723         new_node.degree     = 0;
724         new_node.neighbours = NULL;
725         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
726
727         allocnew = 1;
728         for (nbr = node->neighbours; nbr; nbr = nbr->next)
729                 if (nbr->irn == n2) {
730                         allocnew = 0;
731                         break;
732                 }
733
734         /* if we did not find n2 in n1's neighbourhood insert it */
735         if (allocnew) {
736                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
737                 nbr = obstack_finish(&co->obst);
738                 nbr->irn   = n2;
739                 nbr->costs = 0;
740                 nbr->next  = node->neighbours;
741                 node->neighbours = nbr;
742                 node->degree++;
743         }
744
745         /* now nbr points to n1's neighbour-entry of n2 */
746         nbr->costs += costs;
747 }
748
749 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
750         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
751                 add_edge(co, n1, n2, costs);
752                 add_edge(co, n2, n1, costs);
753         }
754 }
755
756 static void build_graph_walker(ir_node *irn, void *env) {
757         copy_opt_t *co = env;
758         int pos, max;
759         const arch_register_t *reg;
760
761         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
762                 return;
763
764         reg = arch_get_irn_register(co->aenv, irn);
765         if (arch_register_type_is(reg, ignore))
766                 return;
767
768         /* Phis */
769         if (is_Reg_Phi(irn))
770                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
771                         ir_node *arg = get_irn_n(irn, pos);
772                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
773                 }
774
775         /* Perms */
776         else if (is_Perm_Proj(co->aenv, irn)) {
777                 ir_node *arg = get_Perm_src(irn);
778                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
779         }
780
781         /* 2-address code */
782         else {
783                 const arch_register_req_t *req =
784                         arch_get_register_req(co->aenv, irn, -1);
785                 if (is_2addr_code(req)) {
786                         ir_node *other = get_irn_n(irn, req->other_same);
787                         add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
788                 }
789         }
790 }
791
792 void co_build_graph_structure(copy_opt_t *co) {
793         obstack_init(&co->obst);
794         co->nodes = new_set(compare_affinity_node_t, 32);
795
796         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
797 }
798
799 void co_free_graph_structure(copy_opt_t *co) {
800         ASSERT_GS_AVAIL(co);
801
802         del_set(co->nodes);
803         obstack_free(&co->obst, NULL);
804         co->nodes = NULL;
805 }
806
807 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
808
809 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
810         affinity_node_t new_node, *n;
811
812         ASSERT_GS_AVAIL(co);
813
814         new_node.irn = irn;
815         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
816         if (n) {
817                 return (n->degree > 0);
818         } else
819                 return 0;
820 }
821
822 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
823 {
824         be_ifg_t *ifg  = co->cenv->ifg;
825         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
826
827         ir_node *irn;
828         void *it, *nit;
829         int i, n, n_regs;
830
831         n_regs = 0;
832         for(i = 0; i < co->cls->n_regs; ++i) {
833                 const arch_register_t *reg = &co->cls->regs[i];
834                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
835         }
836
837         /*
838          * n contains the first node number.
839          * the values below n are the pre-colored register nodes
840          */
841
842         it  = be_ifg_nodes_iter_alloca(ifg);
843         nit = be_ifg_neighbours_iter_alloca(ifg);
844
845         n = n_regs;
846         be_ifg_foreach_node(ifg, it, irn) {
847                 if(!arch_irn_is(co->aenv, irn, ignore))
848                         set_irn_link(irn, INT_TO_PTR(n++));
849         }
850
851         fprintf(f, "%d %d\n", n, n_regs);
852
853         be_ifg_foreach_node(ifg, it, irn) {
854                 if(!arch_irn_is(co->aenv, irn, ignore)) {
855                         int idx            = PTR_TO_INT(get_irn_link(irn));
856                         affinity_node_t *a = get_affinity_info(co, irn);
857
858                         const arch_register_req_t *req;
859                         ir_node *adj;
860
861                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
862                         if(arch_register_req_is(req, limited)) {
863                                 for(i = 0; i < co->cls->n_regs; ++i) {
864                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
865                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
866                                 }
867                         }
868
869
870                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
871                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
872                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
873                                         if(idx < adj_idx)
874                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
875                                 }
876                         }
877
878                         if(a) {
879                                 neighb_t *n;
880
881                                 co_gs_foreach_neighb(a, n) {
882                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
883                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
884                                                 if(idx < n_idx)
885                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
886                                         }
887                                 }
888                         }
889                 }
890         }
891 }
892
893 typedef struct _appel_clique_walker_t {
894         phase_t ph;
895         const copy_opt_t *co;
896         int curr_nr;
897         int node_count;
898         FILE *f;
899         int dumb;
900         int *color_map;
901         struct obstack obst;
902 } appel_clique_walker_t;
903
904 typedef struct _appel_block_info_t {
905         int *live_end_nr;
906         int *live_in_nr;
907         int *phi_nr;
908         ir_node **live_end;
909         ir_node **live_in;
910         ir_node **phi;
911         int n_live_end;
912         int n_live_in;
913         int n_phi;
914 } appel_block_info_t;
915
916 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
917 {
918 #if 0
919         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
920         int res = (int) freq;
921         return res == 0 ? 1 : res;
922 #else
923         ir_loop *loop = get_irn_loop(bl);
924         if(loop) {
925                 int d = get_loop_depth(loop);
926                 return 1 + d * d;
927         }
928         return 1;
929 #endif
930 }
931
932 static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old)
933 {
934         appel_block_info_t *res = NULL;
935
936         if(is_Block(irn)) {
937                 appel_clique_walker_t *d = (void *) phase;
938                 res = phase_alloc(phase, sizeof(res[0]));
939                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
940                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
941                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
942                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
943                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
944                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
945         }
946
947         return res;
948 }
949
950 typedef struct _insn_list_t {
951         be_insn_t *insn;
952         struct list_head list;
953 } insn_list_t;
954
955 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
956 {
957         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
958         int i;
959
960         for(i = 0; i < bli->n_live_end; ++i)
961                 if(bli->live_end[i] == irn)
962                         return bli->live_end_nr[i];
963
964         return -1;
965 }
966
967 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
968 {
969         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
970         ir_node *irn;
971         int n_live;
972         int j;
973
974         n_live = 0;
975         foreach_pset(live, irn)
976                 live_arr[n_live++] = irn;
977
978         /* dump the live after clique */
979         if(!env->dumb) {
980                 for(j = 0; j < n_live; ++j) {
981                         int k;
982
983                         for(k = j + 1; k < n_live; ++k) {
984                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
985                         }
986                         fprintf(env->f, "\n");
987                 }
988         }
989
990         /* dump the affinities */
991         for(j = 0; !env->dumb && j < n_live; ++j) {
992                 ir_node *irn = live_arr[j];
993                 int old_nr = PTR_TO_INT(get_irn_link(irn));
994
995                 /* if the node was already live in the last insn dump the affinity */
996                 if(old_nr > start_nr) {
997                         int weight = appel_aff_weight(env, bl);
998                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
999                 }
1000         }
1001
1002         /* set the current numbers into the link field. */
1003         for(j = 0; j < n_live; ++j) {
1004                 ir_node *irn = live_arr[j];
1005                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1006         }
1007
1008         return curr_nr + n_live;
1009 }
1010
1011 static void appel_walker(ir_node *bl, void *data)
1012 {
1013         appel_clique_walker_t *env = data;
1014         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1015         struct obstack *obst       = &env->obst;
1016         void *base                 = obstack_base(obst);
1017         pset *live                 = pset_new_ptr_default();
1018         be_lv_t *lv                = env->co->cenv->birg->lv;
1019
1020         int n_insns  = 0;
1021         int n_nodes  = 0;
1022         int start_nr = env->curr_nr;
1023         int curr_nr  = start_nr;
1024
1025         be_insn_env_t insn_env;
1026         int i, j;
1027         ir_node *irn;
1028         be_insn_t **insns;
1029
1030         insn_env.aenv = env->co->aenv;
1031         insn_env.cls  = env->co->cls;
1032         insn_env.obst = obst;
1033         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1034
1035         /* Guess how many insns will be in this block. */
1036         sched_foreach(bl, irn)
1037                 n_nodes++;
1038
1039         bli->n_phi = 0;
1040         insns = malloc(n_nodes * sizeof(insns[0]));
1041
1042         /* Put all insns in an array. */
1043         irn = sched_first(bl);
1044         while(!sched_is_end(irn)) {
1045                 be_insn_t *insn;
1046                 insn = be_scan_insn(&insn_env, irn);
1047                 insns[n_insns++] = insn;
1048                 irn = insn->next_insn;
1049         }
1050
1051         DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl));
1052         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live);
1053
1054         /* Generate the bad and ugly. */
1055         for(i = n_insns - 1; i >= 0; --i) {
1056                 be_insn_t *insn = insns[i];
1057
1058                 /* The first live set has to be saved in the block border set. */
1059                 if(i == n_insns - 1) {
1060                         j = 0;
1061                         foreach_pset(live, irn) {
1062                                 bli->live_end[j]    = irn;
1063                                 bli->live_end_nr[j] = curr_nr + j;
1064                                 ++j;
1065                         }
1066                         bli->n_live_end = j;
1067                 }
1068
1069                 if(!env->dumb) {
1070                         for(j = 0; j < insn->use_start; ++j) {
1071                                 ir_node *op   = insn->ops[j].carrier;
1072                                 bitset_t *adm = insn->ops[j].regs;
1073                                 int k;
1074                                 int nr;
1075
1076                                 if(!insn->ops[j].has_constraints)
1077                                         continue;
1078
1079                                 nr = 0;
1080                                 foreach_pset(live, irn) {
1081                                         if(irn == op) {
1082                                                 pset_break(live);
1083                                                 break;
1084                                         }
1085                                         ++nr;
1086                                 }
1087
1088                                 assert(nr < pset_count(live));
1089
1090                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1091                                         int mapped_col = env->color_map[k];
1092                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1093                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1094                                 }
1095                         }
1096                 }
1097
1098                 /* dump the clique and update the stuff. */
1099                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1100
1101                 /* remove all defs. */
1102                 for(j = 0; j < insn->use_start; ++j)
1103                         pset_remove_ptr(live, insn->ops[j].carrier);
1104
1105                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1106                         bli->phi[bli->n_phi]    = insn->irn;
1107                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1108                         bli->n_phi++;
1109                 }
1110
1111                 /* add all uses */
1112                 else
1113                         for(j = insn->use_start; j < insn->n_ops; ++j)
1114                                 pset_insert_ptr(live, insn->ops[j].carrier);
1115         }
1116
1117         /* print the start clique. */
1118         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1119
1120         i = 0;
1121         foreach_pset(live, irn) {
1122                 bli->live_in[i]    = irn;
1123                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1124                 ++i;
1125         }
1126         bli->n_live_in = i;
1127
1128         del_pset(live);
1129         free(insns);
1130         obstack_free(obst, base);
1131         env->curr_nr = curr_nr;
1132 }
1133
1134 static void appel_inter_block_aff(ir_node *bl, void *data)
1135 {
1136         appel_clique_walker_t *env = data;
1137         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1138
1139         int i, j, n;
1140
1141         for(i = 0; i < bli->n_live_in; ++i) {
1142                 ir_node *irn = bli->live_in[i];
1143
1144                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1145                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1146
1147                         int nr = appel_get_live_end_nr(env, pred, irn);
1148                         assert(nr >= 0);
1149                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1150                 }
1151         }
1152
1153         for(i = 0; i < bli->n_phi; ++i) {
1154                 ir_node *irn = bli->phi[i];
1155
1156                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1157                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1158                         ir_node *op = get_irn_n(irn, j);
1159
1160                         int nr = appel_get_live_end_nr(env, pred, op);
1161                         assert(nr >= 0);
1162                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1163                 }
1164         }
1165
1166 }
1167
1168 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1169 {
1170         int i;
1171         int n_colors;
1172         appel_clique_walker_t env;
1173         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1174         be_lv_t *lv = co->cenv->birg->lv;
1175
1176         be_liveness_recompute(lv);
1177         obstack_init(&env.obst);
1178         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init);
1179         env.curr_nr = co->cls->n_regs;
1180         env.co = co;
1181         env.f = f;
1182
1183         bitset_copy(adm, co->cenv->ignore_colors);
1184         bitset_flip_all(adm);
1185
1186         /* Make color map. */
1187         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1188         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1189                 const arch_register_t *reg = &co->cls->regs[i];
1190                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1191         }
1192
1193         env.dumb = 1;
1194         env.curr_nr = n_colors;
1195         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1196         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1197
1198         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1199
1200         /* make the first k nodes interfere */
1201         for(i = 0; i < n_colors; ++i) {
1202                 int j;
1203                 for(j = i + 1; j < n_colors; ++j)
1204                         fprintf(f, "%d %d -1 ", i, j);
1205                 fprintf(f, "\n");
1206         }
1207
1208         env.dumb = 0;
1209         env.curr_nr = n_colors;
1210         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1211         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1212         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1213         obstack_free(&env.obst, NULL);
1214 }
1215
1216 /*
1217          ___ _____ ____   ____   ___ _____   ____                        _
1218         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1219          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1220          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1221         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1222                                                                   |_|            |___/
1223 */
1224
1225 static const char *get_dot_color_name(int col)
1226 {
1227         static const char *names[] = {
1228                 "blue",
1229                 "red",
1230                 "green",
1231                 "yellow",
1232                 "cyan",
1233                 "magenta",
1234                 "orange",
1235                 "chocolate",
1236                 "beige",
1237                 "navy",
1238                 "darkgreen",
1239                 "darkred",
1240                 "lightPink",
1241                 "chartreuse",
1242                 "lightskyblue",
1243                 "linen",
1244                 "pink",
1245                 "lightslateblue",
1246                 "mintcream",
1247                 "red",
1248                 "darkolivegreen",
1249                 "mediumblue",
1250                 "mistyrose",
1251                 "salmon",
1252                 "darkseagreen",
1253                 "mediumslateblue"
1254                 "moccasin",
1255                 "tomato",
1256                 "forestgreen",
1257                 "darkturquoise",
1258                 "palevioletred"
1259         };
1260
1261         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1262 }
1263
1264 typedef struct _co_ifg_dump_t {
1265         const copy_opt_t *co;
1266         unsigned flags;
1267 } co_ifg_dump_t;
1268
1269 static void ifg_dump_graph_attr(FILE *f, void *self)
1270 {
1271         fprintf(f, "overlap=scale");
1272 }
1273
1274 static int ifg_is_dump_node(void *self, ir_node *irn)
1275 {
1276         co_ifg_dump_t *cod = self;
1277         return !arch_irn_is(cod->co->aenv, irn, ignore);
1278 }
1279
1280 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1281 {
1282         co_ifg_dump_t *env         = self;
1283         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1284         const arch_register_req_t *req;
1285         int limited;
1286
1287         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1288         limited = arch_register_req_is(req, limited);
1289
1290         if(env->flags & CO_IFG_DUMP_LABELS) {
1291                 ir_fprintf(f, "label=\"%+F", irn);
1292
1293 #if 0
1294                 // TODO fix this...
1295                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1296                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1297                         req.limited(req.limited_env, bs);
1298                         ir_fprintf(f, "\\n%B", bs);
1299                 }
1300 #endif
1301                 ir_fprintf(f, "\" ");
1302         } else {
1303                 fprintf(f, "label=\"\" shape=point " );
1304         }
1305
1306         if(env->flags & CO_IFG_DUMP_SHAPE)
1307                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1308
1309         if(env->flags & CO_IFG_DUMP_COLORS)
1310                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1311 }
1312
1313 static void ifg_dump_at_end(FILE *file, void *self)
1314 {
1315         co_ifg_dump_t *env = self;
1316         affinity_node_t *a;
1317
1318         co_gs_foreach_aff_node(env->co, a) {
1319                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1320                 unsigned aidx = get_irn_idx(a->irn);
1321                 neighb_t *n;
1322
1323                 co_gs_foreach_neighb(a, n) {
1324                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1325                         unsigned nidx = get_irn_idx(n->irn);
1326
1327                         if(aidx < nidx) {
1328                                 const char *color = nr == ar ? "blue" : "red";
1329                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1330                                 if(env->flags & CO_IFG_DUMP_LABELS)
1331                                         fprintf(file, "label=\"%d\" ", n->costs);
1332                                 if(env->flags & CO_IFG_DUMP_COLORS)
1333                                         fprintf(file, "color=%s ", color);
1334                                 else
1335                                         fprintf(file, "style=dotted");
1336                                 fprintf(file, "];\n");
1337                         }
1338                 }
1339         }
1340 }
1341
1342
1343 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1344         ifg_is_dump_node,
1345         ifg_dump_graph_attr,
1346         ifg_dump_node_attr,
1347         NULL,
1348         NULL,
1349         ifg_dump_at_end
1350 };
1351
1352
1353
1354 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1355 {
1356         co_ifg_dump_t cod;
1357
1358         cod.co    = co;
1359         cod.flags = flags;
1360         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1361 }
1362
1363
1364 void co_solve_park_moon(copy_opt_t *opt)
1365 {
1366
1367 }
1368
1369 static int void_algo(copy_opt_t *co)
1370 {
1371         return 0;
1372 }
1373
1374 /*
1375                 _    _                  _ _   _
1376            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1377           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1378          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1379         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1380                            |___/
1381 */
1382
1383 typedef struct {
1384         co_algo_t *algo;
1385         const char *name;
1386         int can_improve_existing;
1387 } co_algo_info_t;
1388
1389 static co_algo_info_t algos[] = {
1390         { void_algo,                "none",  0 },
1391         { co_solve_heuristic,       "heur1", 0 },
1392         { co_solve_heuristic_new,   "heur2", 0 },
1393         { co_solve_heuristic_java,  "heur3", 0 },
1394 #ifdef WITH_ILP
1395         { co_solve_ilp2,            "ilp",   1 },
1396 #endif
1397         { NULL,                     "",      0 }
1398 };
1399
1400 /*
1401     __  __       _         ____       _
1402    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1403    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1404    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1405    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1406
1407 */
1408
1409 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1410 {
1411         FILE *result;
1412         char buf[1024];
1413
1414         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1415         result = fopen(buf, "wt");
1416         if(result == NULL) {
1417                 panic("Couldn't open '%s' for writing.", buf);
1418         }
1419
1420         return result;
1421 }
1422
1423 void co_driver(be_chordal_env_t *cenv)
1424 {
1425         lc_timer_t *timer = lc_timer_register("firm.be.copyopt", "runtime");
1426         co_complete_stats_t before, after;
1427         copy_opt_t *co;
1428         co_algo_t  *algo_func;
1429         int was_optimal = 0;
1430
1431         if(algo < 0 || algo >= CO_ALGO_LAST)
1432                 return;
1433
1434         co = new_copy_opt(cenv, cost_func);
1435         co_build_ou_structure(co);
1436         co_build_graph_structure(co);
1437
1438         co_complete_stats(co, &before);
1439
1440         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1441         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1442         be_stat_ev_ull("co_max_costs",    before.max_costs);
1443         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1444         be_stat_ev_ull("co_aff_int",      before.aff_int);
1445
1446         be_stat_ev_ull("co_init_costs",   before.costs);
1447         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1448
1449         /* Dump the interference graph in Appel's format. */
1450         if(dump_flags & DUMP_APPEL) {
1451                 FILE *f = my_open(cenv, "", ".apl");
1452                 co_dump_appel_graph(co, f);
1453                 fclose(f);
1454         }
1455
1456         if(dump_flags & DUMP_BEFORE) {
1457                 FILE *f = my_open(cenv, "", "-before.dot");
1458                 co_dump_ifg_dot(co, f, style_flags);
1459                 fclose(f);
1460         }
1461
1462         /* if the algo can improve results, provide an initial solution with heur3 */
1463         if(improve && algos[algo].can_improve_existing) {
1464                 co_complete_stats_t stats;
1465
1466                 /* produce a heuristical solution */
1467                 co_solve_heuristic_java(co);
1468
1469                 /* do the stats and provide the current costs */
1470                 co_complete_stats(co, &stats);
1471                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1472         }
1473
1474 #ifdef WITH_JVM
1475         /* start the JVM here so that it does not tamper the timing. */
1476         if(algo == CO_ALGO_HEUR3)
1477                 be_java_coal_start_jvm();
1478 #endif
1479
1480         algo_func = algos[algo].algo;
1481
1482         lc_timer_reset_and_start(timer);
1483
1484         was_optimal = algo_func(co);
1485
1486         lc_timer_stop(timer);
1487         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1488
1489         be_stat_ev_ull("co_optimal", was_optimal);
1490
1491         if(dump_flags & DUMP_AFTER) {
1492                 FILE *f = my_open(cenv, "", "-after.dot");
1493                 co_dump_ifg_dot(co, f, style_flags);
1494                 fclose(f);
1495         }
1496
1497         co_complete_stats(co, &after);
1498
1499         if(do_stats) {
1500                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1501                 ulong64 evitable          = after.costs     - after.inevit_costs;
1502
1503                 ir_printf("%30F ", cenv->irg);
1504                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1505
1506                 if(optimizable_costs > 0)
1507                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1508                 else
1509                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1510         }
1511
1512         be_stat_ev_ull("co_after_costs", after.costs);
1513         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1514
1515         co_free_graph_structure(co);
1516         co_free_ou_structure(co);
1517         free_copy_opt(co);
1518 }