Updated for new architecture
[libfirm] / ir / be / becopyopt.c
1 /**
2  * Author:      Daniel Grund
3  * Date:                12.04.2005
4  * Copyright:   (c) Universitaet Karlsruhe
5  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  */
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10
11 #ifdef HAVE_ALLOCA_H
12 #include <alloca.h>
13 #endif
14 #ifdef HAVE_MALLOC_H
15 #include <malloc.h>
16 #endif
17
18 #include "execfreq.h"
19 #include "xmalloc.h"
20 #include "debug.h"
21 #include "pmap.h"
22 #include "irgraph.h"
23 #include "irgwalk.h"
24 #include "irprog.h"
25 #include "irloop_t.h"
26 #include "iredges_t.h"
27 #include "phiclass.h"
28 #include "irbitset.h"
29 #include "irphase_t.h"
30 #include "irprintf_t.h"
31
32 #include "bemodule.h"
33 #include "bearch.h"
34 #include "benode_t.h"
35 #include "beutil.h"
36 #include "beifg_t.h"
37 #include "becopyopt_t.h"
38 #include "becopystat.h"
39 #include "belive_t.h"
40 #include "beinsn_t.h"
41 #include "besched_t.h"
42 #include "benodesets.h"
43 #include "bejavacoal.h"
44 #include "bestatevent.h"
45 #include "beirg_t.h"
46 #include "error.h"
47
48 #ifdef WITH_LIBCORE
49 #include <libcore/lc_timing.h>
50 #include <libcore/lc_opts.h>
51 #endif /* WITH_LIBCORE */
52
53 #define DUMP_BEFORE 1
54 #define DUMP_AFTER  2
55 #define DUMP_APPEL  4
56 #define DUMP_ALL    2 * DUMP_APPEL - 1
57
58 #define COST_FUNC_FREQ     1
59 #define COST_FUNC_LOOP     2
60 #define COST_FUNC_ALL_ONE  3
61
62 static unsigned dump_flags    = 0;
63 static unsigned style_flags   = 0;
64 static unsigned  do_stats     = 0;
65 static cost_fct_t cost_func   = co_get_costs_exec_freq;
66 static unsigned algo          = CO_ALGO_HEUR2;
67 static int improve            = 1;
68
69 #ifdef WITH_LIBCORE
70 static const lc_opt_enum_mask_items_t dump_items[] = {
71         { "before",  DUMP_BEFORE },
72         { "after",   DUMP_AFTER  },
73         { "appel",   DUMP_APPEL  },
74         { "all",     DUMP_ALL    },
75         { NULL,      0 }
76 };
77
78 static const lc_opt_enum_mask_items_t style_items[] = {
79         { "color",   CO_IFG_DUMP_COLORS },
80         { "labels",  CO_IFG_DUMP_LABELS },
81         { "constr",  CO_IFG_DUMP_CONSTR },
82         { "shape",   CO_IFG_DUMP_SHAPE  },
83         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
84         { NULL,      0 }
85 };
86
87 static const lc_opt_enum_mask_items_t algo_items[] = {
88         { "none",   CO_ALGO_NONE  },
89         { "heur",   CO_ALGO_HEUR  },
90         { "heur2",  CO_ALGO_HEUR2 },
91 #ifdef WITH_JVM
92         { "heur3",  CO_ALGO_HEUR3 },
93 #endif /* WITH_JVM */
94 #ifdef WITH_ILP
95         { "ilp",    CO_ALGO_ILP   },
96 #endif /* WITH_ILP */
97         { NULL,     0 }
98 };
99
100 typedef int (*opt_funcptr)(void);
101
102 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
103         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
104         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
105         { "one",    (opt_funcptr) co_get_costs_all_one },
106         { NULL,     NULL }
107 };
108
109 static lc_opt_enum_mask_var_t dump_var = {
110         &dump_flags, dump_items
111 };
112
113 static lc_opt_enum_mask_var_t style_var = {
114         &style_flags, style_items
115 };
116
117 static lc_opt_enum_mask_var_t algo_var = {
118         &algo, algo_items
119 };
120
121 static lc_opt_enum_func_ptr_var_t cost_func_var = {
122         (opt_funcptr*) &cost_func, cost_func_items
123 };
124
125 static const lc_opt_table_entry_t options[] = {
126         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
127         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
128         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
129         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
130         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
131         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
132         { NULL }
133 };
134
135 /* Insert additional options registration functions here. */
136 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
137 extern void be_co2_register_options(lc_opt_entry_t *grp);
138 extern void be_co3_register_options(lc_opt_entry_t *grp);
139
140 void be_init_copycoal(void)
141 {
142         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
143         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
144         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
145         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
146
147         lc_opt_add_table(co_grp, options);
148 }
149
150 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
151 #endif
152
153
154 #undef QUICK_AND_DIRTY_HACK
155
156 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
157 {
158         if(env->ifg)
159                 return be_ifg_connected(env->ifg, a, b);
160         else
161                 return values_interfere(env->birg->lv, a, b);
162 }
163
164
165 /******************************************************************************
166     _____                           _
167    / ____|                         | |
168   | |  __  ___ _ __   ___ _ __ __ _| |
169   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
170   | |__| |  __/ | | |  __/ | | (_| | |
171    \_____|\___|_| |_|\___|_|  \__,_|_|
172
173  ******************************************************************************/
174
175 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
176
177
178 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
179 {
180         const char *s1, *s2, *s3;
181         int len;
182         copy_opt_t *co;
183
184         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
185
186         co = xcalloc(1, sizeof(*co));
187         co->cenv      = chordal_env;
188         co->aenv      = chordal_env->birg->main_env->arch_env;
189         co->irg       = chordal_env->irg;
190         co->cls       = chordal_env->cls;
191         co->get_costs = get_costs;
192
193         s1 = get_irp_prog_name();
194         s2 = get_entity_name(get_irg_entity(co->irg));
195         s3 = chordal_env->cls->name;
196         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
197         co->name = xmalloc(len);
198         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
199
200         return co;
201 }
202
203 void free_copy_opt(copy_opt_t *co) {
204         xfree(co->name);
205         free(co);
206 }
207
208 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
209         arch_register_req_t req;
210         const arch_register_t *reg;
211
212         if (arch_irn_is(co->aenv, irn, ignore))
213                 return 0;
214
215         reg = arch_get_irn_register(co->aenv, irn);
216         if (arch_register_type_is(reg, ignore))
217                 return 0;
218
219         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(co->aenv, irn, &req))
220                 return 1;
221
222         return 0;
223 }
224
225 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
226         const ir_edge_t *edge;
227         const arch_register_t *reg;
228
229         assert(0 && "Is buggy and obsolete. Do not use");
230
231         if (arch_irn_is(co->aenv, irn, ignore))
232                 return 0;
233
234         reg = arch_get_irn_register(co->aenv, irn);
235         if (arch_register_type_is(reg, ignore))
236                 return 0;
237
238         foreach_out_edge(irn, edge) {
239                 ir_node *n = edge->src;
240
241                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
242                         arch_register_req_t req;
243                         arch_get_register_req(co->aenv, &req, n, -1);
244
245                         if(is_Reg_Phi(n) ||
246                            is_Perm(co->aenv, n) ||
247                            (arch_register_req_is(&req, should_be_same) && req.other_same == irn)
248                           )
249                                 return 1;
250                 }
251         }
252
253         return 0;
254 }
255
256 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
257         int cost = 0;
258         ir_loop *loop;
259         ir_node *root_block = get_nodes_block(root);
260
261         if (is_Phi(root)) {
262                 /* for phis the copies are placed in the corresponding pred-block */
263                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
264         } else {
265                 /* a perm places the copy in the same block as it resides */
266                 loop = get_irn_loop(root_block);
267         }
268         if (loop) {
269                 int d = get_loop_depth(loop);
270                 cost = d*d;
271         }
272         return 1+cost;
273 }
274
275 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
276         int res;
277         ir_node *root_bl = get_nodes_block(root);
278         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
279         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
280
281         /* don't allow values smaller than one. */
282         return res < 1 ? 1 : res;
283 }
284
285
286 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
287         return 1;
288 }
289
290 /******************************************************************************
291    ____        _   _    _       _ _          _____ _
292   / __ \      | | | |  | |     (_) |        / ____| |
293  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
294  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
295  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
296   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
297         | |                                                            __/ |
298         |_|                                                           |___/
299  ******************************************************************************/
300
301 /**
302  * Determines a maximum weighted independent set with respect to
303  * the interference and conflict edges of all nodes in a qnode.
304  */
305 static int ou_max_ind_set_costs(unit_t *ou) {
306         be_chordal_env_t *chordal_env = ou->co->cenv;
307         ir_node **safe, **unsafe;
308         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
309         bitset_t *curr;
310         int max, pos, curr_weight, best_weight = 0;
311
312         /* assign the nodes into two groups.
313          * safe: node has no interference, hence it is in every max stable set.
314          * unsafe: node has an interference
315          */
316         safe = alloca((ou->node_count-1) * sizeof(*safe));
317         safe_costs = 0;
318         safe_count = 0;
319         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
320         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
321         unsafe_count = 0;
322         for(i=1; i<ou->node_count; ++i) {
323                 int is_safe = 1;
324                 for(o=1; o<ou->node_count; ++o) {
325                         if (i==o)
326                                 continue;
327                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
328                                 unsafe_costs[unsafe_count] = ou->costs[i];
329                                 unsafe[unsafe_count] = ou->nodes[i];
330                                 ++unsafe_count;
331                                 is_safe = 0;
332                                 break;
333                         }
334                 }
335                 if (is_safe) {
336                         safe_costs += ou->costs[i];
337                         safe[safe_count++] = ou->nodes[i];
338                 }
339         }
340
341
342         /* now compute the best set out of the unsafe nodes*/
343         if (unsafe_count > MIS_HEUR_TRIGGER) {
344                 bitset_t *best = bitset_alloca(unsafe_count);
345                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
346                 for (i=0; i<unsafe_count; ++i) {
347                         bitset_set(best, i);
348                         /* check if it is a stable set */
349                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
350                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
351                                         bitset_clear(best, i); /* clear the bit and try next one */
352                                         break;
353                                 }
354                 }
355                 /* compute the weight */
356                 bitset_foreach(best, pos)
357                         best_weight += unsafe_costs[pos];
358         } else {
359                 /* Exact Algorithm: Brute force */
360                 curr = bitset_alloca(unsafe_count);
361                 bitset_set_all(curr);
362                 while ((max = bitset_popcnt(curr)) != 0) {
363                         /* check if curr is a stable set */
364                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
365                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
366                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
367                                                         goto no_stable_set;
368
369                         /* if we arrive here, we have a stable set */
370                         /* compute the weigth of the stable set*/
371                         curr_weight = 0;
372                         bitset_foreach(curr, pos)
373                                 curr_weight += unsafe_costs[pos];
374
375                         /* any better ? */
376                         if (curr_weight > best_weight) {
377                                 best_weight = curr_weight;
378                         }
379
380         no_stable_set:
381                         bitset_minus1(curr);
382                 }
383         }
384
385         return safe_costs+best_weight;
386 }
387
388 static void co_collect_units(ir_node *irn, void *env) {
389         copy_opt_t *co = env;
390         unit_t *unit;
391         arch_register_req_t req;
392
393         if (!is_curr_reg_class(co, irn))
394                 return;
395         if (!co_is_optimizable_root(co, irn))
396                 return;
397
398         /* Init a new unit */
399         unit = xcalloc(1, sizeof(*unit));
400         unit->co = co;
401         unit->node_count = 1;
402         INIT_LIST_HEAD(&unit->queue);
403
404         /* Phi with some/all of its arguments */
405         if (is_Reg_Phi(irn)) {
406                 int i, arity;
407
408                 /* init */
409                 arity = get_irn_arity(irn);
410                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
411                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
412                 unit->nodes[0] = irn;
413
414                 /* fill */
415                 for (i=0; i<arity; ++i) {
416                         int o, arg_pos;
417                         ir_node *arg = get_irn_n(irn, i);
418
419                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
420                         if (arg == irn)
421                                 continue;
422                         if (nodes_interfere(co->cenv, irn, arg)) {
423                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
424                                 continue;
425                         }
426
427                         /* Else insert the argument of the phi to the members of this ou */
428                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
429
430                         /* Check if arg has occurred at a prior position in the arg/list */
431                         arg_pos = 0;
432                         for (o=0; o<unit->node_count; ++o)
433                                 if (unit->nodes[o] == arg) {
434                                         arg_pos = o;
435                                         break;
436                                 }
437
438                         if (!arg_pos) { /* a new argument */
439                                 /* insert node, set costs */
440                                 unit->nodes[unit->node_count] = arg;
441                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
442                                 unit->node_count++;
443                         } else { /* arg has occured before in same phi */
444                                 /* increase costs for existing arg */
445                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
446                         }
447                 }
448                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
449                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
450         } else
451
452         /* Proj of a perm with corresponding arg */
453         if (is_Perm_Proj(co->aenv, irn)) {
454                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
455                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
456                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
457                 unit->node_count = 2;
458                 unit->nodes[0] = irn;
459                 unit->nodes[1] = get_Perm_src(irn);
460                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
461         } else
462
463         /* Src == Tgt of a 2-addr-code instruction */
464         if (is_2addr_code(co->aenv, irn, &req)) {
465                 ir_node *other = req.other_same;
466                 if (!nodes_interfere(co->cenv, irn, other)) {
467                         unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
468                         unit->costs = xmalloc(2 * sizeof(*unit->costs));
469                         unit->node_count = 2;
470                         unit->nodes[0] = irn;
471                         unit->nodes[1] = other;
472                         unit->costs[1] = co->get_costs(co, irn, other, -1);
473                 }
474         } else
475                 assert(0 && "This is not an optimizable node!");
476
477         /* Insert the new unit at a position according to its costs */
478         if (unit->node_count > 1) {
479                 int i;
480                 struct list_head *tmp;
481
482                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
483                 for(i=1; i<unit->node_count; ++i) {
484                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
485                         unit->all_nodes_costs += unit->costs[i];
486                 }
487
488                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
489                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
490                 /* Insert the new ou according to its sort_key */
491                 tmp = &co->units;
492                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
493                         tmp = tmp->next;
494                 list_add(&unit->units, tmp);
495         } else {
496                 free(unit);
497         }
498 }
499
500 #ifdef QUICK_AND_DIRTY_HACK
501
502 static int compare_ous(const void *k1, const void *k2) {
503         const unit_t *u1 = *((const unit_t **) k1);
504         const unit_t *u2 = *((const unit_t **) k2);
505         int i, o, u1_has_constr, u2_has_constr;
506         arch_register_req_t req;
507         const arch_env_t *aenv = u1->co->aenv;
508
509         /* Units with constraints come first */
510         u1_has_constr = 0;
511         for (i=0; i<u1->node_count; ++i) {
512                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
513                 if (arch_register_req_is(&req, limited)) {
514                         u1_has_constr = 1;
515                         break;
516                 }
517         }
518
519         u2_has_constr = 0;
520         for (i=0; i<u2->node_count; ++i) {
521                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
522                 if (arch_register_req_is(&req, limited)) {
523                         u2_has_constr = 1;
524                         break;
525                 }
526         }
527
528         if (u1_has_constr != u2_has_constr)
529                 return u2_has_constr - u1_has_constr;
530
531         /* Now check, whether the two units are connected */
532 #if 0
533         for (i=0; i<u1->node_count; ++i)
534                 for (o=0; o<u2->node_count; ++o)
535                         if (u1->nodes[i] == u2->nodes[o])
536                                 return 0;
537 #endif
538
539         /* After all, the sort key decides. Greater keys come first. */
540         return u2->sort_key - u1->sort_key;
541
542 }
543
544 /**
545  * Sort the ou's according to constraints and their sort_key
546  */
547 static void co_sort_units(copy_opt_t *co) {
548         int i, count = 0, costs;
549         unit_t *ou, **ous;
550
551         /* get the number of ous, remove them form the list and fill the array */
552         list_for_each_entry(unit_t, ou, &co->units, units)
553                 count++;
554         ous = alloca(count * sizeof(*ous));
555
556         costs = co_get_max_copy_costs(co);
557
558         i = 0;
559         list_for_each_entry(unit_t, ou, &co->units, units)
560                 ous[i++] = ou;
561
562         INIT_LIST_HEAD(&co->units);
563
564         assert(count == i && list_empty(&co->units));
565
566         for (i=0; i<count; ++i)
567                 ir_printf("%+F\n", ous[i]->nodes[0]);
568
569         qsort(ous, count, sizeof(*ous), compare_ous);
570
571         ir_printf("\n\n");
572         for (i=0; i<count; ++i)
573                 ir_printf("%+F\n", ous[i]->nodes[0]);
574
575         /* reinsert into list in correct order */
576         for (i=0; i<count; ++i)
577                 list_add_tail(&ous[i]->units, &co->units);
578
579         assert(costs == co_get_max_copy_costs(co));
580 }
581 #endif
582
583 void co_build_ou_structure(copy_opt_t *co) {
584         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
585         INIT_LIST_HEAD(&co->units);
586         irg_walk_graph(co->irg, co_collect_units, NULL, co);
587 #ifdef QUICK_AND_DIRTY_HACK
588         co_sort_units(co);
589 #endif
590 }
591
592 void co_free_ou_structure(copy_opt_t *co) {
593         unit_t *curr, *tmp;
594         ASSERT_OU_AVAIL(co);
595         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
596                 xfree(curr->nodes);
597                 xfree(curr->costs);
598                 xfree(curr);
599         }
600         co->units.next = NULL;
601 }
602
603 /* co_solve_heuristic() is implemented in becopyheur.c */
604
605 int co_get_max_copy_costs(const copy_opt_t *co) {
606         int i, res = 0;
607         unit_t *curr;
608
609         ASSERT_OU_AVAIL(co);
610
611         list_for_each_entry(unit_t, curr, &co->units, units) {
612                 res += curr->inevitable_costs;
613                 for (i=1; i<curr->node_count; ++i)
614                         res += curr->costs[i];
615         }
616         return res;
617 }
618
619 int co_get_inevit_copy_costs(const copy_opt_t *co) {
620         int res = 0;
621         unit_t *curr;
622
623         ASSERT_OU_AVAIL(co);
624
625         list_for_each_entry(unit_t, curr, &co->units, units)
626                 res += curr->inevitable_costs;
627         return res;
628 }
629
630 int co_get_copy_costs(const copy_opt_t *co) {
631         int i, res = 0;
632         unit_t *curr;
633
634         ASSERT_OU_AVAIL(co);
635
636         list_for_each_entry(unit_t, curr, &co->units, units) {
637                 int root_col = get_irn_col(co, curr->nodes[0]);
638                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
639                 res += curr->inevitable_costs;
640                 for (i=1; i<curr->node_count; ++i) {
641                         int arg_col = get_irn_col(co, curr->nodes[i]);
642                         if (root_col != arg_col) {
643                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
644                                 res += curr->costs[i];
645                         }
646                 }
647         }
648         return res;
649 }
650
651 int co_get_lower_bound(const copy_opt_t *co) {
652         int res = 0;
653         unit_t *curr;
654
655         ASSERT_OU_AVAIL(co);
656
657         list_for_each_entry(unit_t, curr, &co->units, units)
658                 res += curr->inevitable_costs + curr->min_nodes_costs;
659         return res;
660 }
661
662 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
663 {
664         bitset_t *seen = bitset_irg_malloc(co->irg);
665         affinity_node_t *an;
666
667         memset(stat, 0, sizeof(stat[0]));
668
669         /* count affinity edges. */
670         co_gs_foreach_aff_node(co, an) {
671                 neighb_t *neigh;
672                 stat->aff_nodes += 1;
673                 bitset_add_irn(seen, an->irn);
674                 co_gs_foreach_neighb(an, neigh) {
675                         if(!bitset_contains_irn(seen, neigh->irn)) {
676                                 stat->aff_edges += 1;
677                                 stat->max_costs += neigh->costs;
678
679                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
680                                         stat->costs += neigh->costs;
681                                         stat->unsatisfied_edges += 1;
682                                 }
683
684                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
685                                         stat->aff_int += 1;
686                                         stat->inevit_costs += neigh->costs;
687                                 }
688
689                         }
690                 }
691         }
692
693         bitset_free(seen);
694 }
695
696 /******************************************************************************
697    _____                 _        _____ _
698   / ____|               | |      / ____| |
699  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
700  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
701  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
702   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
703                   | |                                       __/ |
704                   |_|                                      |___/
705  ******************************************************************************/
706
707 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
708         const affinity_node_t *n1 = k1;
709         const affinity_node_t *n2 = k2;
710
711         return (n1->irn != n2->irn);
712 }
713
714 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
715         affinity_node_t new_node, *node;
716         neighb_t new_nbr, *nbr;
717         int allocnew;
718
719         new_node.irn        = n1;
720         new_node.degree     = 0;
721         new_node.neighbours = NULL;
722         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
723
724         allocnew = 1;
725         for (nbr = node->neighbours; nbr; nbr = nbr->next)
726                 if (nbr->irn == n2) {
727                         allocnew = 0;
728                         break;
729                 }
730
731         /* if we did not find n2 in n1's neighbourhood insert it */
732         if (allocnew) {
733                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
734                 nbr = obstack_finish(&co->obst);
735                 nbr->irn   = n2;
736                 nbr->costs = 0;
737                 nbr->next  = node->neighbours;
738                 node->neighbours = nbr;
739                 node->degree++;
740         }
741
742         /* now nbr points to n1's neighbour-entry of n2 */
743         nbr->costs += costs;
744 }
745
746 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
747         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
748                 add_edge(co, n1, n2, costs);
749                 add_edge(co, n2, n1, costs);
750         }
751 }
752
753 static void build_graph_walker(ir_node *irn, void *env) {
754         copy_opt_t *co = env;
755         int pos, max;
756         arch_register_req_t req;
757         const arch_register_t *reg;
758
759         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
760                 return;
761
762         reg = arch_get_irn_register(co->aenv, irn);
763         if (arch_register_type_is(reg, ignore))
764                 return;
765
766         /* Phis */
767         if (is_Reg_Phi(irn))
768                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
769                         ir_node *arg = get_irn_n(irn, pos);
770                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
771                 }
772
773         /* Perms */
774         else if (is_Perm_Proj(co->aenv, irn)) {
775                 ir_node *arg = get_Perm_src(irn);
776                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
777         }
778
779         /* 2-address code */
780         else if (is_2addr_code(co->aenv, irn, &req))
781                 add_edges(co, irn, req.other_same, co->get_costs(co, irn, req.other_same, 0));
782 }
783
784 void co_build_graph_structure(copy_opt_t *co) {
785         obstack_init(&co->obst);
786         co->nodes = new_set(compare_affinity_node_t, 32);
787
788         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
789 }
790
791 void co_free_graph_structure(copy_opt_t *co) {
792         ASSERT_GS_AVAIL(co);
793
794         del_set(co->nodes);
795         obstack_free(&co->obst, NULL);
796         co->nodes = NULL;
797 }
798
799 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
800
801 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
802         affinity_node_t new_node, *n;
803
804         ASSERT_GS_AVAIL(co);
805
806         new_node.irn = irn;
807         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
808         if (n) {
809                 return (n->degree > 0);
810         } else
811                 return 0;
812 }
813
814 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
815 {
816         be_ifg_t *ifg  = co->cenv->ifg;
817         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
818         bitset_t *adm  = bitset_alloca(co->cls->n_regs);
819
820         ir_node *irn;
821         void *it, *nit;
822         int i, n, n_regs;
823
824         n_regs = 0;
825         for(i = 0; i < co->cls->n_regs; ++i) {
826                 const arch_register_t *reg = &co->cls->regs[i];
827                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
828         }
829
830         /*
831          * n contains the first node number.
832          * the values below n are the pre-colored register nodes
833          */
834
835         it  = be_ifg_nodes_iter_alloca(ifg);
836         nit = be_ifg_neighbours_iter_alloca(ifg);
837
838         n = n_regs;
839         be_ifg_foreach_node(ifg, it, irn) {
840                 if(!arch_irn_is(co->aenv, irn, ignore))
841                         set_irn_link(irn, INT_TO_PTR(n++));
842         }
843
844         fprintf(f, "%d %d\n", n, n_regs);
845
846         be_ifg_foreach_node(ifg, it, irn) {
847                 if(!arch_irn_is(co->aenv, irn, ignore)) {
848                         int idx            = PTR_TO_INT(get_irn_link(irn));
849                         affinity_node_t *a = get_affinity_info(co, irn);
850
851                         arch_register_req_t req;
852                         ir_node *adj;
853
854                         arch_get_register_req(co->aenv, &req, irn, BE_OUT_POS(0));
855                         if(arch_register_req_is(&req, limited)) {
856                                 bitset_clear_all(adm);
857                                 req.limited(req.limited_env, adm);
858                                 for(i = 0; i < co->cls->n_regs; ++i)
859                                         if(!bitset_is_set(adm, i) && color_map[i] >= 0)
860                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
861
862                         }
863
864
865                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
866                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
867                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
868                                         if(idx < adj_idx)
869                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
870                                 }
871                         }
872
873                         if(a) {
874                                 neighb_t *n;
875
876                                 co_gs_foreach_neighb(a, n) {
877                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
878                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
879                                                 if(idx < n_idx)
880                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
881                                         }
882                                 }
883                         }
884                 }
885         }
886 }
887
888 typedef struct _appel_clique_walker_t {
889         phase_t ph;
890         const copy_opt_t *co;
891         int curr_nr;
892         int node_count;
893         FILE *f;
894         int dumb;
895         int *color_map;
896         struct obstack obst;
897 } appel_clique_walker_t;
898
899 typedef struct _appel_block_info_t {
900         int *live_end_nr;
901         int *live_in_nr;
902         int *phi_nr;
903         ir_node **live_end;
904         ir_node **live_in;
905         ir_node **phi;
906         int n_live_end;
907         int n_live_in;
908         int n_phi;
909 } appel_block_info_t;
910
911 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
912 {
913 #if 0
914         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
915         int res = (int) freq;
916         return res == 0 ? 1 : res;
917 #else
918         ir_loop *loop = get_irn_loop(bl);
919         if(loop) {
920                 int d = get_loop_depth(loop);
921                 return 1 + d * d;
922         }
923         return 1;
924 #endif
925 }
926
927 static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old)
928 {
929         appel_block_info_t *res = NULL;
930
931         if(is_Block(irn)) {
932                 appel_clique_walker_t *d = (void *) phase;
933                 res = phase_alloc(phase, sizeof(res[0]));
934                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
935                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
936                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
937                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
938                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
939                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
940         }
941
942         return res;
943 }
944
945 typedef struct _insn_list_t {
946         be_insn_t *insn;
947         struct list_head list;
948 } insn_list_t;
949
950 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
951 {
952         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
953         int i;
954
955         for(i = 0; i < bli->n_live_end; ++i)
956                 if(bli->live_end[i] == irn)
957                         return bli->live_end_nr[i];
958
959         return -1;
960 }
961
962 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
963 {
964         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
965         ir_node *irn;
966         int n_live;
967         int j;
968
969         n_live = 0;
970         foreach_pset(live, irn)
971                 live_arr[n_live++] = irn;
972
973         /* dump the live after clique */
974         if(!env->dumb) {
975                 for(j = 0; j < n_live; ++j) {
976                         int k;
977
978                         for(k = j + 1; k < n_live; ++k) {
979                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
980                         }
981                         fprintf(env->f, "\n");
982                 }
983         }
984
985         /* dump the affinities */
986         for(j = 0; !env->dumb && j < n_live; ++j) {
987                 ir_node *irn = live_arr[j];
988                 int old_nr = PTR_TO_INT(get_irn_link(irn));
989
990                 /* if the node was already live in the last insn dump the affinity */
991                 if(old_nr > start_nr) {
992                         int weight = appel_aff_weight(env, bl);
993                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
994                 }
995         }
996
997         /* set the current numbers into the link field. */
998         for(j = 0; j < n_live; ++j) {
999                 ir_node *irn = live_arr[j];
1000                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1001         }
1002
1003         return curr_nr + n_live;
1004 }
1005
1006 static void appel_walker(ir_node *bl, void *data)
1007 {
1008         appel_clique_walker_t *env = data;
1009         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1010         struct obstack *obst       = &env->obst;
1011         void *base                 = obstack_base(obst);
1012         pset *live                 = pset_new_ptr_default();
1013         be_lv_t *lv                = env->co->cenv->birg->lv;
1014
1015         int n_insns  = 0;
1016         int n_nodes  = 0;
1017         int start_nr = env->curr_nr;
1018         int curr_nr  = start_nr;
1019
1020         be_insn_env_t insn_env;
1021         int i, j;
1022         ir_node *irn;
1023         be_insn_t **insns;
1024
1025         insn_env.aenv = env->co->aenv;
1026         insn_env.cls  = env->co->cls;
1027         insn_env.obst = obst;
1028         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1029
1030         /* Guess how many insns will be in this block. */
1031         sched_foreach(bl, irn)
1032                 n_nodes++;
1033
1034         bli->n_phi = 0;
1035         insns = malloc(n_nodes * sizeof(insns[0]));
1036
1037         /* Put all insns in an array. */
1038         irn = sched_first(bl);
1039         while(!sched_is_end(irn)) {
1040                 be_insn_t *insn;
1041                 insn = be_scan_insn(&insn_env, irn);
1042                 insns[n_insns++] = insn;
1043                 irn = insn->next_insn;
1044         }
1045
1046         DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl));
1047         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live);
1048
1049         /* Generate the bad and ugly. */
1050         for(i = n_insns - 1; i >= 0; --i) {
1051                 be_insn_t *insn = insns[i];
1052
1053                 /* The first live set has to be saved in the block border set. */
1054                 if(i == n_insns - 1) {
1055                         j = 0;
1056                         foreach_pset(live, irn) {
1057                                 bli->live_end[j]    = irn;
1058                                 bli->live_end_nr[j] = curr_nr + j;
1059                                 ++j;
1060                         }
1061                         bli->n_live_end = j;
1062                 }
1063
1064                 if(!env->dumb) {
1065                         for(j = 0; j < insn->use_start; ++j) {
1066                                 ir_node *op   = insn->ops[j].carrier;
1067                                 bitset_t *adm = insn->ops[j].regs;
1068                                 int k;
1069                                 int nr;
1070
1071                                 if(!insn->ops[j].has_constraints)
1072                                         continue;
1073
1074                                 nr = 0;
1075                                 foreach_pset(live, irn) {
1076                                         if(irn == op) {
1077                                                 pset_break(live);
1078                                                 break;
1079                                         }
1080                                         ++nr;
1081                                 }
1082
1083                                 assert(nr < pset_count(live));
1084
1085                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1086                                         int mapped_col = env->color_map[k];
1087                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1088                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1089                                 }
1090                         }
1091                 }
1092
1093                 /* dump the clique and update the stuff. */
1094                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1095
1096                 /* remove all defs. */
1097                 for(j = 0; j < insn->use_start; ++j)
1098                         pset_remove_ptr(live, insn->ops[j].carrier);
1099
1100                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1101                         bli->phi[bli->n_phi]    = insn->irn;
1102                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1103                         bli->n_phi++;
1104                 }
1105
1106                 /* add all uses */
1107                 else
1108                         for(j = insn->use_start; j < insn->n_ops; ++j)
1109                                 pset_insert_ptr(live, insn->ops[j].carrier);
1110         }
1111
1112         /* print the start clique. */
1113         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1114
1115         i = 0;
1116         foreach_pset(live, irn) {
1117                 bli->live_in[i]    = irn;
1118                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1119                 ++i;
1120         }
1121         bli->n_live_in = i;
1122
1123         del_pset(live);
1124         free(insns);
1125         obstack_free(obst, base);
1126         env->curr_nr = curr_nr;
1127 }
1128
1129 static void appel_inter_block_aff(ir_node *bl, void *data)
1130 {
1131         appel_clique_walker_t *env = data;
1132         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1133
1134         int i, j, n;
1135
1136         for(i = 0; i < bli->n_live_in; ++i) {
1137                 ir_node *irn = bli->live_in[i];
1138
1139                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1140                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1141
1142                         int nr = appel_get_live_end_nr(env, pred, irn);
1143                         assert(nr >= 0);
1144                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1145                 }
1146         }
1147
1148         for(i = 0; i < bli->n_phi; ++i) {
1149                 ir_node *irn = bli->phi[i];
1150
1151                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1152                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1153                         ir_node *op = get_irn_n(irn, j);
1154
1155                         int nr = appel_get_live_end_nr(env, pred, op);
1156                         assert(nr >= 0);
1157                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1158                 }
1159         }
1160
1161 }
1162
1163 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1164 {
1165         int i;
1166         int n_colors;
1167         appel_clique_walker_t env;
1168         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1169         be_lv_t *lv = co->cenv->birg->lv;
1170
1171         be_liveness_recompute(lv);
1172         obstack_init(&env.obst);
1173         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init);
1174         env.curr_nr = co->cls->n_regs;
1175         env.co = co;
1176         env.f = f;
1177
1178         bitset_copy(adm, co->cenv->ignore_colors);
1179         bitset_flip_all(adm);
1180
1181         /* Make color map. */
1182         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1183         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1184                 const arch_register_t *reg = &co->cls->regs[i];
1185                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1186         }
1187
1188         env.dumb = 1;
1189         env.curr_nr = n_colors;
1190         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1191         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1192
1193         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1194
1195         /* make the first k nodes interfere */
1196         for(i = 0; i < n_colors; ++i) {
1197                 int j;
1198                 for(j = i + 1; j < n_colors; ++j)
1199                         fprintf(f, "%d %d -1 ", i, j);
1200                 fprintf(f, "\n");
1201         }
1202
1203         env.dumb = 0;
1204         env.curr_nr = n_colors;
1205         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1206         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1207         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1208         obstack_free(&env.obst, NULL);
1209 }
1210
1211 /*
1212          ___ _____ ____   ____   ___ _____   ____                        _
1213         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1214          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1215          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1216         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1217                                                                   |_|            |___/
1218 */
1219
1220 static const char *get_dot_color_name(int col)
1221 {
1222         static const char *names[] = {
1223                 "blue",
1224                 "red",
1225                 "green",
1226                 "yellow",
1227                 "cyan",
1228                 "magenta",
1229                 "orange",
1230                 "chocolate",
1231                 "beige",
1232                 "navy",
1233                 "darkgreen",
1234                 "darkred",
1235                 "lightPink",
1236                 "chartreuse",
1237                 "lightskyblue",
1238                 "linen",
1239                 "pink",
1240                 "lightslateblue",
1241                 "mintcream",
1242                 "red",
1243                 "darkolivegreen",
1244                 "mediumblue",
1245                 "mistyrose",
1246                 "salmon",
1247                 "darkseagreen",
1248                 "mediumslateblue"
1249                 "moccasin",
1250                 "tomato",
1251                 "forestgreen",
1252                 "darkturquoise",
1253                 "palevioletred"
1254         };
1255
1256         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1257 }
1258
1259 typedef struct _co_ifg_dump_t {
1260         const copy_opt_t *co;
1261         unsigned flags;
1262 } co_ifg_dump_t;
1263
1264 static void ifg_dump_graph_attr(FILE *f, void *self)
1265 {
1266         fprintf(f, "overlap=scale");
1267 }
1268
1269 static int ifg_is_dump_node(void *self, ir_node *irn)
1270 {
1271         co_ifg_dump_t *cod = self;
1272         return !arch_irn_is(cod->co->aenv, irn, ignore);
1273 }
1274
1275 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1276 {
1277         co_ifg_dump_t *env         = self;
1278         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1279         arch_register_req_t req;
1280         int limited;
1281
1282         arch_get_register_req(env->co->aenv, &req, irn, BE_OUT_POS(0));
1283         limited = arch_register_req_is(&req, limited);
1284
1285         if(env->flags & CO_IFG_DUMP_LABELS) {
1286                 ir_fprintf(f, "label=\"%+F", irn);
1287
1288                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1289                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1290                         req.limited(req.limited_env, bs);
1291                         ir_fprintf(f, "\\n%B", bs);
1292                 }
1293                 ir_fprintf(f, "\" ");
1294         }
1295
1296         else
1297                 fprintf(f, "label=\"\" shape=point " );
1298
1299         if(env->flags & CO_IFG_DUMP_SHAPE)
1300                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1301
1302         if(env->flags & CO_IFG_DUMP_COLORS)
1303                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1304 }
1305
1306 static void ifg_dump_at_end(FILE *file, void *self)
1307 {
1308         co_ifg_dump_t *env = self;
1309         affinity_node_t *a;
1310
1311         co_gs_foreach_aff_node(env->co, a) {
1312                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1313                 unsigned aidx = get_irn_idx(a->irn);
1314                 neighb_t *n;
1315
1316                 co_gs_foreach_neighb(a, n) {
1317                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1318                         unsigned nidx = get_irn_idx(n->irn);
1319
1320                         if(aidx < nidx) {
1321                                 const char *color = nr == ar ? "blue" : "red";
1322                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1323                                 if(env->flags & CO_IFG_DUMP_LABELS)
1324                                         fprintf(file, "label=\"%d\" ", n->costs);
1325                                 if(env->flags & CO_IFG_DUMP_COLORS)
1326                                         fprintf(file, "color=%s ", color);
1327                                 else
1328                                         fprintf(file, "style=dotted");
1329                                 fprintf(file, "];\n");
1330                         }
1331                 }
1332         }
1333 }
1334
1335
1336 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1337         ifg_is_dump_node,
1338         ifg_dump_graph_attr,
1339         ifg_dump_node_attr,
1340         NULL,
1341         NULL,
1342         ifg_dump_at_end
1343 };
1344
1345
1346
1347 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1348 {
1349         co_ifg_dump_t cod;
1350
1351         cod.co    = co;
1352         cod.flags = flags;
1353         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1354 }
1355
1356
1357 void co_solve_park_moon(copy_opt_t *opt)
1358 {
1359
1360 }
1361
1362 static int void_algo(copy_opt_t *co)
1363 {
1364         return 0;
1365 }
1366
1367 /*
1368                 _    _                  _ _   _
1369            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1370           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1371          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1372         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1373                            |___/
1374 */
1375
1376 typedef struct {
1377         co_algo_t *algo;
1378         const char *name;
1379         int can_improve_existing;
1380 } co_algo_info_t;
1381
1382 static co_algo_info_t algos[] = {
1383         { void_algo,                "none",  0 },
1384         { co_solve_heuristic,       "heur1", 0 },
1385         { co_solve_heuristic_new,   "heur2", 0 },
1386         { co_solve_heuristic_java,  "heur3", 0 },
1387 #ifdef WITH_ILP
1388         { co_solve_ilp2,            "ilp",   1 },
1389 #endif
1390         { NULL,                     "",      0 }
1391 };
1392
1393 /*
1394     __  __       _         ____       _
1395    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1396    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1397    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1398    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1399
1400 */
1401
1402 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1403 {
1404         FILE *result;
1405         char buf[1024];
1406
1407         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1408         result = fopen(buf, "wt");
1409         if(result == NULL) {
1410                 panic("Couldn't open '%s' for writing.", buf);
1411         }
1412
1413         return result;
1414 }
1415
1416 void co_driver(be_chordal_env_t *cenv)
1417 {
1418 #ifdef WITH_LIBCORE
1419         lc_timer_t *timer = lc_timer_register("firm.be.copyopt", "runtime");
1420 #endif
1421         co_complete_stats_t before, after;
1422         copy_opt_t *co;
1423         co_algo_t  *algo_func;
1424         int was_optimal = 0;
1425
1426         if(algo < 0 || algo >= CO_ALGO_LAST)
1427                 return;
1428
1429         co = new_copy_opt(cenv, cost_func);
1430         co_build_ou_structure(co);
1431         co_build_graph_structure(co);
1432
1433         co_complete_stats(co, &before);
1434
1435         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1436         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1437         be_stat_ev_ull("co_max_costs",    before.max_costs);
1438         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1439         be_stat_ev_ull("co_aff_int",      before.aff_int);
1440
1441         be_stat_ev_ull("co_init_costs",   before.costs);
1442         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1443
1444         /* Dump the interference graph in Appel's format. */
1445         if(dump_flags & DUMP_APPEL) {
1446                 FILE *f = my_open(cenv, "", ".apl");
1447                 co_dump_appel_graph(co, f);
1448                 fclose(f);
1449         }
1450
1451         if(dump_flags & DUMP_BEFORE) {
1452                 FILE *f = my_open(cenv, "", "-before.dot");
1453                 co_dump_ifg_dot(co, f, style_flags);
1454                 fclose(f);
1455         }
1456
1457         /* if the algo can improve results, provide an initial solution with heur3 */
1458         if(improve && algos[algo].can_improve_existing) {
1459                 co_complete_stats_t stats;
1460
1461                 /* produce a heuristical solution */
1462                 co_solve_heuristic_java(co);
1463
1464                 /* do the stats and provide the current costs */
1465                 co_complete_stats(co, &stats);
1466                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1467         }
1468
1469 #ifdef WITH_JVM
1470         /* start the JVM here so that it does not tamper the timing. */
1471         if(algo == CO_ALGO_HEUR3)
1472                 be_java_coal_start_jvm();
1473 #endif
1474
1475         algo_func = algos[algo].algo;
1476
1477 #ifdef WITH_LIBCORE
1478         lc_timer_reset_and_start(timer);
1479 #endif
1480
1481         was_optimal = algo_func(co);
1482
1483 #ifdef WITH_LIBCORE
1484         lc_timer_stop(timer);
1485         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1486 #endif
1487
1488         be_stat_ev_ull("co_optimal", was_optimal);
1489
1490         if(dump_flags & DUMP_AFTER) {
1491                 FILE *f = my_open(cenv, "", "-after.dot");
1492                 co_dump_ifg_dot(co, f, style_flags);
1493                 fclose(f);
1494         }
1495
1496         co_complete_stats(co, &after);
1497
1498         if(do_stats) {
1499                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1500                 ulong64 evitable          = after.costs     - after.inevit_costs;
1501
1502                 ir_printf("%30F ", cenv->irg);
1503                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1504
1505                 if(optimizable_costs > 0)
1506                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1507                 else
1508                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1509         }
1510
1511         be_stat_ev_ull("co_after_costs", after.costs);
1512         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1513
1514         co_free_graph_structure(co);
1515         co_free_ou_structure(co);
1516         free_copy_opt(co);
1517 }