582c4e2cf1324495eacd271437f0f55ab7072324
[libfirm] / ir / be / becopyopt.c
1 /**
2  * Author:      Daniel Grund
3  * Date:                12.04.2005
4  * Copyright:   (c) Universitaet Karlsruhe
5  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  */
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 #ifdef HAVE_ALLOCA_H
11 #include <alloca.h>
12 #endif
13 #ifdef HAVE_MALLOC_H
14 #include <malloc.h>
15 #endif
16
17 #include "execfreq.h"
18 #include "xmalloc.h"
19 #include "debug.h"
20 #include "pmap.h"
21 #include "irgraph.h"
22 #include "irgwalk.h"
23 #include "irprog.h"
24 #include "irloop_t.h"
25 #include "iredges_t.h"
26 #include "phiclass.h"
27 #include "irbitset.h"
28 #include "irphase_t.h"
29 #include "irprintf_t.h"
30
31
32 #include "bearch.h"
33 #include "benode_t.h"
34 #include "beutil.h"
35 #include "beifg_t.h"
36 #include "becopyopt_t.h"
37 #include "becopystat.h"
38 #include "belive_t.h"
39 #include "beinsn_t.h"
40 #include "besched_t.h"
41 #include "benodesets.h"
42 #include "bejavacoal.h"
43 #include "bestatevent.h"
44
45 #ifdef WITH_LIBCORE
46 #include <libcore/lc_timing.h>
47 #include <libcore/lc_opts.h>
48 #endif /* WITH_LIBCORE */
49
50 #define DUMP_BEFORE 1
51 #define DUMP_AFTER  2
52 #define DUMP_APPEL  4
53 #define DUMP_ALL    2 * DUMP_APPEL - 1
54
55 #define COST_FUNC_FREQ     1
56 #define COST_FUNC_LOOP     2
57 #define COST_FUNC_ALL_ONE  3
58
59 static unsigned dump_flags    = 0;
60 static unsigned style_flags   = 0;
61 static unsigned  do_stats     = 0;
62 static cost_fct_t cost_func   = co_get_costs_exec_freq;
63 static unsigned algo          = CO_ALGO_HEUR2;
64 static int improve            = 1;
65
66 #ifdef WITH_LIBCORE
67 static const lc_opt_enum_mask_items_t dump_items[] = {
68         { "before",  DUMP_BEFORE },
69         { "after",   DUMP_AFTER  },
70         { "appel",   DUMP_APPEL  },
71         { "all",     DUMP_ALL    },
72         { NULL,      0 }
73 };
74
75 static const lc_opt_enum_mask_items_t style_items[] = {
76         { "color",   CO_IFG_DUMP_COLORS },
77         { "labels",  CO_IFG_DUMP_LABELS },
78         { "constr",  CO_IFG_DUMP_CONSTR },
79         { "shape",   CO_IFG_DUMP_SHAPE  },
80         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
81         { NULL,      0 }
82 };
83
84 static const lc_opt_enum_mask_items_t algo_items[] = {
85         { "none",   CO_ALGO_NONE  },
86         { "heur",   CO_ALGO_HEUR  },
87         { "heur2",  CO_ALGO_HEUR2 },
88 #ifdef WITH_JVM
89         { "heur3",  CO_ALGO_HEUR3 },
90 #endif /* WITH_JVM */
91 #ifdef WITH_ILP
92         { "ilp",    CO_ALGO_ILP   },
93 #endif /* WITH_ILP */
94         { NULL,     0 }
95 };
96
97 typedef int (*opt_funcptr)(void);
98
99 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
100         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
101         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
102         { "one",    (opt_funcptr) co_get_costs_all_one },
103         { NULL,     NULL }
104 };
105
106 static lc_opt_enum_mask_var_t dump_var = {
107         &dump_flags, dump_items
108 };
109
110 static lc_opt_enum_mask_var_t style_var = {
111         &style_flags, style_items
112 };
113
114 static lc_opt_enum_mask_var_t algo_var = {
115         &algo, algo_items
116 };
117
118 static lc_opt_enum_func_ptr_var_t cost_func_var = {
119         (opt_funcptr*) &cost_func, cost_func_items
120 };
121
122 static const lc_opt_table_entry_t options[] = {
123         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
124         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
125         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
126         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
127         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
128         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
129         { NULL }
130 };
131
132 /* Insert additional options registration functions here. */
133 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
134 extern void be_co2_register_options(lc_opt_entry_t *grp);
135 extern void be_co3_register_options(lc_opt_entry_t *grp);
136
137 void co_register_options(lc_opt_entry_t *grp)
138 {
139         lc_opt_entry_t *co_grp = lc_opt_get_grp(grp, "co");
140         lc_opt_add_table(co_grp, options);
141
142         be_co2_register_options(co_grp);
143         be_co3_register_options(co_grp);
144 #ifdef WITH_ILP
145         be_co_ilp_register_options(co_grp);
146 #endif
147 }
148 #endif
149
150
151 #undef QUICK_AND_DIRTY_HACK
152
153 /******************************************************************************
154     _____                           _
155    / ____|                         | |
156   | |  __  ___ _ __   ___ _ __ __ _| |
157   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
158   | |__| |  __/ | | |  __/ | | (_| | |
159    \_____|\___|_| |_|\___|_|  \__,_|_|
160
161  ******************************************************************************/
162
163 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
164
165
166 void be_copy_opt_init(void) {
167 }
168
169 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
170 {
171         const char *s1, *s2, *s3;
172         int len;
173         copy_opt_t *co;
174
175         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
176
177         co = xcalloc(1, sizeof(*co));
178         co->cenv      = chordal_env;
179         co->aenv      = chordal_env->birg->main_env->arch_env;
180         co->irg       = chordal_env->irg;
181         co->cls       = chordal_env->cls;
182         co->get_costs = get_costs;
183
184         s1 = get_irp_prog_name();
185         s2 = get_entity_name(get_irg_entity(co->irg));
186         s3 = chordal_env->cls->name;
187         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
188         co->name = xmalloc(len);
189         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
190
191         return co;
192 }
193
194 void free_copy_opt(copy_opt_t *co) {
195         xfree(co->name);
196         free(co);
197 }
198
199 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
200         arch_register_req_t req;
201         const arch_register_t *reg;
202
203         if (arch_irn_is(co->aenv, irn, ignore))
204                 return 0;
205
206         reg = arch_get_irn_register(co->aenv, irn);
207         if (arch_register_type_is(reg, ignore))
208                 return 0;
209
210         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(co->aenv, irn, &req))
211                 return 1;
212
213         return 0;
214 }
215
216 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
217         const ir_edge_t *edge;
218         const arch_register_t *reg;
219
220         assert(0 && "Is buggy and obsolete. Do not use");
221
222         if (arch_irn_is(co->aenv, irn, ignore))
223                 return 0;
224
225         reg = arch_get_irn_register(co->aenv, irn);
226         if (arch_register_type_is(reg, ignore))
227                 return 0;
228
229         foreach_out_edge(irn, edge) {
230                 ir_node *n = edge->src;
231
232                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
233                         arch_register_req_t req;
234                         arch_get_register_req(co->aenv, &req, n, -1);
235
236                         if(is_Reg_Phi(n) ||
237                            is_Perm(co->aenv, n) ||
238                            (arch_register_req_is(&req, should_be_same) && req.other_same == irn)
239                           )
240                                 return 1;
241                 }
242         }
243
244         return 0;
245 }
246
247 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
248         int cost = 0;
249         ir_loop *loop;
250         ir_node *root_block = get_nodes_block(root);
251
252         if (is_Phi(root)) {
253                 /* for phis the copies are placed in the corresponding pred-block */
254                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
255         } else {
256                 /* a perm places the copy in the same block as it resides */
257                 loop = get_irn_loop(root_block);
258         }
259         if (loop) {
260                 int d = get_loop_depth(loop);
261                 cost = d*d;
262         }
263         return 1+cost;
264 }
265
266 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
267         int res;
268         ir_node *root_bl = get_nodes_block(root);
269         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
270         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
271
272         /* don't allow values smaller than one. */
273         return res < 1 ? 1 : res;
274 }
275
276
277 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
278         return 1;
279 }
280
281 /******************************************************************************
282    ____        _   _    _       _ _          _____ _
283   / __ \      | | | |  | |     (_) |        / ____| |
284  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
285  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
286  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
287   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
288         | |                                                            __/ |
289         |_|                                                           |___/
290  ******************************************************************************/
291
292 /**
293  * Determines a maximum weighted independent set with respect to
294  * the interference and conflict edges of all nodes in a qnode.
295  */
296 static int ou_max_ind_set_costs(unit_t *ou) {
297         be_chordal_env_t *chordal_env = ou->co->cenv;
298         ir_node **safe, **unsafe;
299         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
300         bitset_t *curr;
301         int max, pos, curr_weight, best_weight = 0;
302
303         /* assign the nodes into two groups.
304          * safe: node has no interference, hence it is in every max stable set.
305          * unsafe: node has an interference
306          */
307         safe = alloca((ou->node_count-1) * sizeof(*safe));
308         safe_costs = 0;
309         safe_count = 0;
310         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
311         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
312         unsafe_count = 0;
313         for(i=1; i<ou->node_count; ++i) {
314                 int is_safe = 1;
315                 for(o=1; o<ou->node_count; ++o) {
316                         if (i==o)
317                                 continue;
318                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
319                                 unsafe_costs[unsafe_count] = ou->costs[i];
320                                 unsafe[unsafe_count] = ou->nodes[i];
321                                 ++unsafe_count;
322                                 is_safe = 0;
323                                 break;
324                         }
325                 }
326                 if (is_safe) {
327                         safe_costs += ou->costs[i];
328                         safe[safe_count++] = ou->nodes[i];
329                 }
330         }
331
332
333         /* now compute the best set out of the unsafe nodes*/
334         if (unsafe_count > MIS_HEUR_TRIGGER) {
335                 bitset_t *best = bitset_alloca(unsafe_count);
336                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
337                 for (i=0; i<unsafe_count; ++i) {
338                         bitset_set(best, i);
339                         /* check if it is a stable set */
340                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
341                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
342                                         bitset_clear(best, i); /* clear the bit and try next one */
343                                         break;
344                                 }
345                 }
346                 /* compute the weight */
347                 bitset_foreach(best, pos)
348                         best_weight += unsafe_costs[pos];
349         } else {
350                 /* Exact Algorithm: Brute force */
351                 curr = bitset_alloca(unsafe_count);
352                 bitset_set_all(curr);
353                 while ((max = bitset_popcnt(curr)) != 0) {
354                         /* check if curr is a stable set */
355                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
356                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
357                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
358                                                         goto no_stable_set;
359
360                         /* if we arrive here, we have a stable set */
361                         /* compute the weigth of the stable set*/
362                         curr_weight = 0;
363                         bitset_foreach(curr, pos)
364                                 curr_weight += unsafe_costs[pos];
365
366                         /* any better ? */
367                         if (curr_weight > best_weight) {
368                                 best_weight = curr_weight;
369                         }
370
371         no_stable_set:
372                         bitset_minus1(curr);
373                 }
374         }
375
376         return safe_costs+best_weight;
377 }
378
379 static void co_collect_units(ir_node *irn, void *env) {
380         copy_opt_t *co = env;
381         unit_t *unit;
382         arch_register_req_t req;
383
384         if (!is_curr_reg_class(co, irn))
385                 return;
386         if (!co_is_optimizable_root(co, irn))
387                 return;
388
389         /* Init a new unit */
390         unit = xcalloc(1, sizeof(*unit));
391         unit->co = co;
392         unit->node_count = 1;
393         INIT_LIST_HEAD(&unit->queue);
394
395         /* Phi with some/all of its arguments */
396         if (is_Reg_Phi(irn)) {
397                 int i, arity;
398
399                 /* init */
400                 arity = get_irn_arity(irn);
401                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
402                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
403                 unit->nodes[0] = irn;
404
405                 /* fill */
406                 for (i=0; i<arity; ++i) {
407                         int o, arg_pos;
408                         ir_node *arg = get_irn_n(irn, i);
409
410                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
411                         if (arg == irn)
412                                 continue;
413                         if (nodes_interfere(co->cenv, irn, arg)) {
414                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
415                                 continue;
416                         }
417
418                         /* Else insert the argument of the phi to the members of this ou */
419                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
420
421                         /* Check if arg has occurred at a prior position in the arg/list */
422                         arg_pos = 0;
423                         for (o=0; o<unit->node_count; ++o)
424                                 if (unit->nodes[o] == arg) {
425                                         arg_pos = o;
426                                         break;
427                                 }
428
429                         if (!arg_pos) { /* a new argument */
430                                 /* insert node, set costs */
431                                 unit->nodes[unit->node_count] = arg;
432                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
433                                 unit->node_count++;
434                         } else { /* arg has occured before in same phi */
435                                 /* increase costs for existing arg */
436                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
437                         }
438                 }
439                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
440                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
441         } else
442
443         /* Proj of a perm with corresponding arg */
444         if (is_Perm_Proj(co->aenv, irn)) {
445                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
446                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
447                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
448                 unit->node_count = 2;
449                 unit->nodes[0] = irn;
450                 unit->nodes[1] = get_Perm_src(irn);
451                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
452         } else
453
454         /* Src == Tgt of a 2-addr-code instruction */
455         if (is_2addr_code(co->aenv, irn, &req)) {
456                 ir_node *other = req.other_same;
457                 if (!nodes_interfere(co->cenv, irn, other)) {
458                         unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
459                         unit->costs = xmalloc(2 * sizeof(*unit->costs));
460                         unit->node_count = 2;
461                         unit->nodes[0] = irn;
462                         unit->nodes[1] = other;
463                         unit->costs[1] = co->get_costs(co, irn, other, -1);
464                 }
465         } else
466                 assert(0 && "This is not an optimizable node!");
467
468         /* Insert the new unit at a position according to its costs */
469         if (unit->node_count > 1) {
470                 int i;
471                 struct list_head *tmp;
472
473                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
474                 for(i=1; i<unit->node_count; ++i) {
475                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
476                         unit->all_nodes_costs += unit->costs[i];
477                 }
478
479                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
480                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
481                 /* Insert the new ou according to its sort_key */
482                 tmp = &co->units;
483                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
484                         tmp = tmp->next;
485                 list_add(&unit->units, tmp);
486         } else {
487                 free(unit);
488         }
489 }
490
491 #ifdef QUICK_AND_DIRTY_HACK
492
493 static int compare_ous(const void *k1, const void *k2) {
494         const unit_t *u1 = *((const unit_t **) k1);
495         const unit_t *u2 = *((const unit_t **) k2);
496         int i, o, u1_has_constr, u2_has_constr;
497         arch_register_req_t req;
498         const arch_env_t *aenv = u1->co->aenv;
499
500         /* Units with constraints come first */
501         u1_has_constr = 0;
502         for (i=0; i<u1->node_count; ++i) {
503                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
504                 if (arch_register_req_is(&req, limited)) {
505                         u1_has_constr = 1;
506                         break;
507                 }
508         }
509
510         u2_has_constr = 0;
511         for (i=0; i<u2->node_count; ++i) {
512                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
513                 if (arch_register_req_is(&req, limited)) {
514                         u2_has_constr = 1;
515                         break;
516                 }
517         }
518
519         if (u1_has_constr != u2_has_constr)
520                 return u2_has_constr - u1_has_constr;
521
522         /* Now check, whether the two units are connected */
523 #if 0
524         for (i=0; i<u1->node_count; ++i)
525                 for (o=0; o<u2->node_count; ++o)
526                         if (u1->nodes[i] == u2->nodes[o])
527                                 return 0;
528 #endif
529
530         /* After all, the sort key decides. Greater keys come first. */
531         return u2->sort_key - u1->sort_key;
532
533 }
534
535 /**
536  * Sort the ou's according to constraints and their sort_key
537  */
538 static void co_sort_units(copy_opt_t *co) {
539         int i, count = 0, costs;
540         unit_t *ou, **ous;
541
542         /* get the number of ous, remove them form the list and fill the array */
543         list_for_each_entry(unit_t, ou, &co->units, units)
544                 count++;
545         ous = alloca(count * sizeof(*ous));
546
547         costs = co_get_max_copy_costs(co);
548
549         i = 0;
550         list_for_each_entry(unit_t, ou, &co->units, units)
551                 ous[i++] = ou;
552
553         INIT_LIST_HEAD(&co->units);
554
555         assert(count == i && list_empty(&co->units));
556
557         for (i=0; i<count; ++i)
558                 ir_printf("%+F\n", ous[i]->nodes[0]);
559
560         qsort(ous, count, sizeof(*ous), compare_ous);
561
562         ir_printf("\n\n");
563         for (i=0; i<count; ++i)
564                 ir_printf("%+F\n", ous[i]->nodes[0]);
565
566         /* reinsert into list in correct order */
567         for (i=0; i<count; ++i)
568                 list_add_tail(&ous[i]->units, &co->units);
569
570         assert(costs == co_get_max_copy_costs(co));
571 }
572 #endif
573
574 void co_build_ou_structure(copy_opt_t *co) {
575         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
576         INIT_LIST_HEAD(&co->units);
577         irg_walk_graph(co->irg, co_collect_units, NULL, co);
578 #ifdef QUICK_AND_DIRTY_HACK
579         co_sort_units(co);
580 #endif
581 }
582
583 void co_free_ou_structure(copy_opt_t *co) {
584         unit_t *curr, *tmp;
585         ASSERT_OU_AVAIL(co);
586         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
587                 xfree(curr->nodes);
588                 xfree(curr->costs);
589                 xfree(curr);
590         }
591         co->units.next = NULL;
592 }
593
594 /* co_solve_heuristic() is implemented in becopyheur.c */
595
596 int co_get_max_copy_costs(const copy_opt_t *co) {
597         int i, res = 0;
598         unit_t *curr;
599
600         ASSERT_OU_AVAIL(co);
601
602         list_for_each_entry(unit_t, curr, &co->units, units) {
603                 res += curr->inevitable_costs;
604                 for (i=1; i<curr->node_count; ++i)
605                         res += curr->costs[i];
606         }
607         return res;
608 }
609
610 int co_get_inevit_copy_costs(const copy_opt_t *co) {
611         int res = 0;
612         unit_t *curr;
613
614         ASSERT_OU_AVAIL(co);
615
616         list_for_each_entry(unit_t, curr, &co->units, units)
617                 res += curr->inevitable_costs;
618         return res;
619 }
620
621 int co_get_copy_costs(const copy_opt_t *co) {
622         int i, res = 0;
623         unit_t *curr;
624
625         ASSERT_OU_AVAIL(co);
626
627         list_for_each_entry(unit_t, curr, &co->units, units) {
628                 int root_col = get_irn_col(co, curr->nodes[0]);
629                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
630                 res += curr->inevitable_costs;
631                 for (i=1; i<curr->node_count; ++i) {
632                         int arg_col = get_irn_col(co, curr->nodes[i]);
633                         if (root_col != arg_col) {
634                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
635                                 res += curr->costs[i];
636                         }
637                 }
638         }
639         return res;
640 }
641
642 int co_get_lower_bound(const copy_opt_t *co) {
643         int res = 0;
644         unit_t *curr;
645
646         ASSERT_OU_AVAIL(co);
647
648         list_for_each_entry(unit_t, curr, &co->units, units)
649                 res += curr->inevitable_costs + curr->min_nodes_costs;
650         return res;
651 }
652
653 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
654 {
655         bitset_t *seen = bitset_irg_malloc(co->irg);
656         affinity_node_t *an;
657
658         memset(stat, 0, sizeof(stat[0]));
659
660         /* count affinity edges. */
661         co_gs_foreach_aff_node(co, an) {
662                 neighb_t *neigh;
663                 stat->aff_nodes += 1;
664                 bitset_add_irn(seen, an->irn);
665                 co_gs_foreach_neighb(an, neigh) {
666                         if(!bitset_contains_irn(seen, neigh->irn)) {
667                                 stat->aff_edges += 1;
668                                 stat->max_costs += neigh->costs;
669
670                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
671                                         stat->costs += neigh->costs;
672                                         stat->unsatisfied_edges += 1;
673                                 }
674
675                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
676                                         stat->aff_int += 1;
677                                         stat->inevit_costs += neigh->costs;
678                                 }
679
680                         }
681                 }
682         }
683
684         bitset_free(seen);
685 }
686
687 /******************************************************************************
688    _____                 _        _____ _
689   / ____|               | |      / ____| |
690  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
691  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
692  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
693   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
694                   | |                                       __/ |
695                   |_|                                      |___/
696  ******************************************************************************/
697
698 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
699         const affinity_node_t *n1 = k1;
700         const affinity_node_t *n2 = k2;
701
702         return (n1->irn != n2->irn);
703 }
704
705 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
706         affinity_node_t new_node, *node;
707         neighb_t new_nbr, *nbr;
708         int allocnew;
709
710         new_node.irn        = n1;
711         new_node.degree     = 0;
712         new_node.neighbours = NULL;
713         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
714
715         allocnew = 1;
716         for (nbr = node->neighbours; nbr; nbr = nbr->next)
717                 if (nbr->irn == n2) {
718                         allocnew = 0;
719                         break;
720                 }
721
722         /* if we did not find n2 in n1's neighbourhood insert it */
723         if (allocnew) {
724                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
725                 nbr = obstack_finish(&co->obst);
726                 nbr->irn   = n2;
727                 nbr->costs = 0;
728                 nbr->next  = node->neighbours;
729                 node->neighbours = nbr;
730                 node->degree++;
731         }
732
733         /* now nbr points to n1's neighbour-entry of n2 */
734         nbr->costs += costs;
735 }
736
737 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
738         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
739                 add_edge(co, n1, n2, costs);
740                 add_edge(co, n2, n1, costs);
741         }
742 }
743
744 static void build_graph_walker(ir_node *irn, void *env) {
745         copy_opt_t *co = env;
746         int pos, max;
747         arch_register_req_t req;
748         const arch_register_t *reg;
749
750         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
751                 return;
752
753         reg = arch_get_irn_register(co->aenv, irn);
754         if (arch_register_type_is(reg, ignore))
755                 return;
756
757         /* Phis */
758         if (is_Reg_Phi(irn))
759                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
760                         ir_node *arg = get_irn_n(irn, pos);
761                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
762                 }
763
764         /* Perms */
765         else if (is_Perm_Proj(co->aenv, irn)) {
766                 ir_node *arg = get_Perm_src(irn);
767                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
768         }
769
770         /* 2-address code */
771         else if (is_2addr_code(co->aenv, irn, &req))
772                 add_edges(co, irn, req.other_same, co->get_costs(co, irn, req.other_same, 0));
773 }
774
775 void co_build_graph_structure(copy_opt_t *co) {
776         obstack_init(&co->obst);
777         co->nodes = new_set(compare_affinity_node_t, 32);
778
779         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
780 }
781
782 void co_free_graph_structure(copy_opt_t *co) {
783         ASSERT_GS_AVAIL(co);
784
785         del_set(co->nodes);
786         obstack_free(&co->obst, NULL);
787         co->nodes = NULL;
788 }
789
790 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
791
792 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
793         affinity_node_t new_node, *n;
794
795         ASSERT_GS_AVAIL(co);
796
797         new_node.irn = irn;
798         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
799         if (n) {
800                 return (n->degree > 0);
801         } else
802                 return 0;
803 }
804
805 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
806 {
807         be_ifg_t *ifg  = co->cenv->ifg;
808         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
809         bitset_t *adm  = bitset_alloca(co->cls->n_regs);
810
811         ir_node *irn;
812         void *it, *nit;
813         int i, n, n_regs;
814
815         n_regs = 0;
816         for(i = 0; i < co->cls->n_regs; ++i) {
817                 const arch_register_t *reg = &co->cls->regs[i];
818                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
819         }
820
821         /*
822          * n contains the first node number.
823          * the values below n are the pre-colored register nodes
824          */
825
826         it  = be_ifg_nodes_iter_alloca(ifg);
827         nit = be_ifg_neighbours_iter_alloca(ifg);
828
829         n = n_regs;
830         be_ifg_foreach_node(ifg, it, irn) {
831                 if(!arch_irn_is(co->aenv, irn, ignore))
832                         set_irn_link(irn, INT_TO_PTR(n++));
833         }
834
835         fprintf(f, "%d %d\n", n, n_regs);
836
837         be_ifg_foreach_node(ifg, it, irn) {
838                 if(!arch_irn_is(co->aenv, irn, ignore)) {
839                         int idx            = PTR_TO_INT(get_irn_link(irn));
840                         affinity_node_t *a = get_affinity_info(co, irn);
841
842                         arch_register_req_t req;
843                         ir_node *adj;
844
845                         arch_get_register_req(co->aenv, &req, irn, BE_OUT_POS(0));
846                         if(arch_register_req_is(&req, limited)) {
847                                 bitset_clear_all(adm);
848                                 req.limited(req.limited_env, adm);
849                                 for(i = 0; i < co->cls->n_regs; ++i)
850                                         if(!bitset_is_set(adm, i) && color_map[i] >= 0)
851                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
852
853                         }
854
855
856                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
857                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
858                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
859                                         if(idx < adj_idx)
860                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
861                                 }
862                         }
863
864                         if(a) {
865                                 neighb_t *n;
866
867                                 co_gs_foreach_neighb(a, n) {
868                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
869                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
870                                                 if(idx < n_idx)
871                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
872                                         }
873                                 }
874                         }
875                 }
876         }
877 }
878
879 typedef struct _appel_clique_walker_t {
880         phase_t ph;
881         const copy_opt_t *co;
882         int curr_nr;
883         int node_count;
884         FILE *f;
885         int dumb;
886         int *color_map;
887         struct obstack obst;
888 } appel_clique_walker_t;
889
890 typedef struct _appel_block_info_t {
891         int *live_end_nr;
892         int *live_in_nr;
893         int *phi_nr;
894         ir_node **live_end;
895         ir_node **live_in;
896         ir_node **phi;
897         int n_live_end;
898         int n_live_in;
899         int n_phi;
900 } appel_block_info_t;
901
902 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
903 {
904 #if 0
905         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
906         int res = (int) freq;
907         return res == 0 ? 1 : res;
908 #else
909         ir_loop *loop = get_irn_loop(bl);
910         if(loop) {
911                 int d = get_loop_depth(loop);
912                 return 1 + d * d;
913         }
914         return 1;
915 #endif
916 }
917
918 static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old)
919 {
920         appel_block_info_t *res = NULL;
921
922         if(is_Block(irn)) {
923                 appel_clique_walker_t *d = (void *) phase;
924                 res = phase_alloc(phase, sizeof(res[0]));
925                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
926                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
927                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
928                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
929                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
930                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
931         }
932
933         return res;
934 }
935
936 typedef struct _insn_list_t {
937         be_insn_t *insn;
938         struct list_head list;
939 } insn_list_t;
940
941 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
942 {
943         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
944         int i;
945
946         for(i = 0; i < bli->n_live_end; ++i)
947                 if(bli->live_end[i] == irn)
948                         return bli->live_end_nr[i];
949
950         return -1;
951 }
952
953 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
954 {
955         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
956         ir_node *irn;
957         int n_live;
958         int j;
959
960         n_live = 0;
961         foreach_pset(live, irn)
962                 live_arr[n_live++] = irn;
963
964         /* dump the live after clique */
965         if(!env->dumb) {
966                 for(j = 0; j < n_live; ++j) {
967                         int k;
968
969                         for(k = j + 1; k < n_live; ++k) {
970                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
971                         }
972                         fprintf(env->f, "\n");
973                 }
974         }
975
976         /* dump the affinities */
977         for(j = 0; !env->dumb && j < n_live; ++j) {
978                 ir_node *irn = live_arr[j];
979                 int old_nr = PTR_TO_INT(get_irn_link(irn));
980
981                 /* if the node was already live in the last insn dump the affinity */
982                 if(old_nr > start_nr) {
983                         int weight = appel_aff_weight(env, bl);
984                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
985                 }
986         }
987
988         /* set the current numbers into the link field. */
989         for(j = 0; j < n_live; ++j) {
990                 ir_node *irn = live_arr[j];
991                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
992         }
993
994         return curr_nr + n_live;
995 }
996
997 static void appel_walker(ir_node *bl, void *data)
998 {
999         appel_clique_walker_t *env = data;
1000         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1001         struct obstack *obst       = &env->obst;
1002         void *base                 = obstack_base(obst);
1003         pset *live                 = pset_new_ptr_default();
1004         be_lv_t *lv                = env->co->cenv->birg->lv;
1005
1006         int n_insns  = 0;
1007         int n_nodes  = 0;
1008         int start_nr = env->curr_nr;
1009         int curr_nr  = start_nr;
1010
1011         be_insn_env_t insn_env;
1012         int i, j;
1013         ir_node *irn;
1014         be_insn_t **insns;
1015
1016         insn_env.aenv = env->co->aenv;
1017         insn_env.cls  = env->co->cls;
1018         insn_env.obst = obst;
1019         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1020
1021         /* Guess how many insns will be in this block. */
1022         sched_foreach(bl, irn)
1023                 n_nodes++;
1024
1025         bli->n_phi = 0;
1026         insns = malloc(n_nodes * sizeof(insns[0]));
1027
1028         /* Put all insns in an array. */
1029         irn = sched_first(bl);
1030         while(!sched_is_end(irn)) {
1031                 be_insn_t *insn;
1032                 insn = be_scan_insn(&insn_env, irn);
1033                 insns[n_insns++] = insn;
1034                 irn = insn->next_insn;
1035         }
1036
1037         DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl));
1038         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live);
1039
1040         /* Generate the bad and ugly. */
1041         for(i = n_insns - 1; i >= 0; --i) {
1042                 be_insn_t *insn = insns[i];
1043
1044                 /* The first live set has to be saved in the block border set. */
1045                 if(i == n_insns - 1) {
1046                         j = 0;
1047                         foreach_pset(live, irn) {
1048                                 bli->live_end[j]    = irn;
1049                                 bli->live_end_nr[j] = curr_nr + j;
1050                                 ++j;
1051                         }
1052                         bli->n_live_end = j;
1053                 }
1054
1055                 if(!env->dumb) {
1056                         for(j = 0; j < insn->use_start; ++j) {
1057                                 ir_node *op   = insn->ops[j].carrier;
1058                                 bitset_t *adm = insn->ops[j].regs;
1059                                 int k;
1060                                 int nr;
1061
1062                                 if(!insn->ops[j].has_constraints)
1063                                         continue;
1064
1065                                 nr = 0;
1066                                 foreach_pset(live, irn) {
1067                                         if(irn == op) {
1068                                                 pset_break(live);
1069                                                 break;
1070                                         }
1071                                         ++nr;
1072                                 }
1073
1074                                 assert(nr < pset_count(live));
1075
1076                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1077                                         int mapped_col = env->color_map[k];
1078                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1079                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1080                                 }
1081                         }
1082                 }
1083
1084                 /* dump the clique and update the stuff. */
1085                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1086
1087                 /* remove all defs. */
1088                 for(j = 0; j < insn->use_start; ++j)
1089                         pset_remove_ptr(live, insn->ops[j].carrier);
1090
1091                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1092                         bli->phi[bli->n_phi]    = insn->irn;
1093                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1094                         bli->n_phi++;
1095                 }
1096
1097                 /* add all uses */
1098                 else
1099                         for(j = insn->use_start; j < insn->n_ops; ++j)
1100                                 pset_insert_ptr(live, insn->ops[j].carrier);
1101         }
1102
1103         /* print the start clique. */
1104         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1105
1106         i = 0;
1107         foreach_pset(live, irn) {
1108                 bli->live_in[i]    = irn;
1109                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1110                 ++i;
1111         }
1112         bli->n_live_in = i;
1113
1114         del_pset(live);
1115         free(insns);
1116         obstack_free(obst, base);
1117         env->curr_nr = curr_nr;
1118 }
1119
1120 static void appel_inter_block_aff(ir_node *bl, void *data)
1121 {
1122         appel_clique_walker_t *env = data;
1123         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1124
1125         int i, j, n;
1126
1127         for(i = 0; i < bli->n_live_in; ++i) {
1128                 ir_node *irn = bli->live_in[i];
1129
1130                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1131                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1132
1133                         int nr = appel_get_live_end_nr(env, pred, irn);
1134                         assert(nr >= 0);
1135                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1136                 }
1137         }
1138
1139         for(i = 0; i < bli->n_phi; ++i) {
1140                 ir_node *irn = bli->phi[i];
1141
1142                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1143                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1144                         ir_node *op = get_irn_n(irn, j);
1145
1146                         int nr = appel_get_live_end_nr(env, pred, op);
1147                         assert(nr >= 0);
1148                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1149                 }
1150         }
1151
1152 }
1153
1154 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1155 {
1156         int i;
1157         int n_colors;
1158         appel_clique_walker_t env;
1159         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1160         be_lv_t *lv = co->cenv->birg->lv;
1161
1162         be_liveness_recompute(lv);
1163         obstack_init(&env.obst);
1164         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init);
1165         env.curr_nr = co->cls->n_regs;
1166         env.co = co;
1167         env.f = f;
1168
1169         bitset_copy(adm, co->cenv->ignore_colors);
1170         bitset_flip_all(adm);
1171
1172         /* Make color map. */
1173         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1174         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1175                 const arch_register_t *reg = &co->cls->regs[i];
1176                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1177         }
1178
1179         env.dumb = 1;
1180         env.curr_nr = n_colors;
1181         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1182         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1183
1184         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1185
1186         /* make the first k nodes interfere */
1187         for(i = 0; i < n_colors; ++i) {
1188                 int j;
1189                 for(j = i + 1; j < n_colors; ++j)
1190                         fprintf(f, "%d %d -1 ", i, j);
1191                 fprintf(f, "\n");
1192         }
1193
1194         env.dumb = 0;
1195         env.curr_nr = n_colors;
1196         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1197         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1198         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1199         obstack_free(&env.obst, NULL);
1200 }
1201
1202 /*
1203          ___ _____ ____   ____   ___ _____   ____                        _
1204         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1205          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1206          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1207         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1208                                                                   |_|            |___/
1209 */
1210
1211 static const char *get_dot_color_name(int col)
1212 {
1213         static const char *names[] = {
1214                 "blue",
1215                 "red",
1216                 "green",
1217                 "yellow",
1218                 "cyan",
1219                 "magenta",
1220                 "orange",
1221                 "chocolate",
1222                 "beige",
1223                 "navy",
1224                 "darkgreen",
1225                 "darkred",
1226                 "lightPink",
1227                 "chartreuse",
1228                 "lightskyblue",
1229                 "linen",
1230                 "pink",
1231                 "lightslateblue",
1232                 "mintcream",
1233                 "red",
1234                 "darkolivegreen",
1235                 "mediumblue",
1236                 "mistyrose",
1237                 "salmon",
1238                 "darkseagreen",
1239                 "mediumslateblue"
1240                 "moccasin",
1241                 "tomato",
1242                 "forestgreen",
1243                 "darkturquoise",
1244                 "palevioletred"
1245         };
1246
1247         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1248 }
1249
1250 typedef struct _co_ifg_dump_t {
1251         const copy_opt_t *co;
1252         unsigned flags;
1253 } co_ifg_dump_t;
1254
1255 static void ifg_dump_graph_attr(FILE *f, void *self)
1256 {
1257         fprintf(f, "overlap=scale");
1258 }
1259
1260 static int ifg_is_dump_node(void *self, ir_node *irn)
1261 {
1262         co_ifg_dump_t *cod = self;
1263         return !arch_irn_is(cod->co->aenv, irn, ignore);
1264 }
1265
1266 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1267 {
1268         co_ifg_dump_t *env         = self;
1269         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1270         arch_register_req_t req;
1271         int limited;
1272
1273         arch_get_register_req(env->co->aenv, &req, irn, BE_OUT_POS(0));
1274         limited = arch_register_req_is(&req, limited);
1275
1276         if(env->flags & CO_IFG_DUMP_LABELS) {
1277                 ir_fprintf(f, "label=\"%+F", irn);
1278
1279                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1280                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1281                         req.limited(req.limited_env, bs);
1282                         ir_fprintf(f, "\\n%B", bs);
1283                 }
1284                 ir_fprintf(f, "\" ");
1285         }
1286
1287         else
1288                 fprintf(f, "label=\"\" shape=point " );
1289
1290         if(env->flags & CO_IFG_DUMP_SHAPE)
1291                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1292
1293         if(env->flags & CO_IFG_DUMP_COLORS)
1294                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1295 }
1296
1297 static void ifg_dump_at_end(FILE *file, void *self)
1298 {
1299         co_ifg_dump_t *env = self;
1300         affinity_node_t *a;
1301
1302         co_gs_foreach_aff_node(env->co, a) {
1303                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1304                 unsigned aidx = get_irn_idx(a->irn);
1305                 neighb_t *n;
1306
1307                 co_gs_foreach_neighb(a, n) {
1308                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1309                         unsigned nidx = get_irn_idx(n->irn);
1310
1311                         if(aidx < nidx) {
1312                                 const char *color = nr == ar ? "blue" : "red";
1313                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1314                                 if(env->flags & CO_IFG_DUMP_LABELS)
1315                                         fprintf(file, "label=\"%d\" ", n->costs);
1316                                 if(env->flags & CO_IFG_DUMP_COLORS)
1317                                         fprintf(file, "color=%s ", color);
1318                                 else
1319                                         fprintf(file, "style=dotted");
1320                                 fprintf(file, "];\n");
1321                         }
1322                 }
1323         }
1324 }
1325
1326
1327 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1328         ifg_is_dump_node,
1329         ifg_dump_graph_attr,
1330         ifg_dump_node_attr,
1331         NULL,
1332         NULL,
1333         ifg_dump_at_end
1334 };
1335
1336
1337
1338 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1339 {
1340         co_ifg_dump_t cod;
1341
1342         cod.co    = co;
1343         cod.flags = flags;
1344         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1345 }
1346
1347
1348 void co_solve_park_moon(copy_opt_t *opt)
1349 {
1350
1351 }
1352
1353 static int void_algo(copy_opt_t *co)
1354 {
1355         return 0;
1356 }
1357
1358 /*
1359                 _    _                  _ _   _
1360            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1361           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1362          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1363         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1364                            |___/
1365 */
1366
1367 typedef struct {
1368         co_algo_t *algo;
1369         const char *name;
1370         int can_improve_existing;
1371 } co_algo_info_t;
1372
1373 static co_algo_info_t algos[] = {
1374         { void_algo,                "none",  0 },
1375         { co_solve_heuristic,       "heur1", 0 },
1376         { co_solve_heuristic_new,   "heur2", 0 },
1377         { co_solve_heuristic_java,  "heur3", 0 },
1378 #ifdef WITH_ILP
1379         { co_solve_ilp2,            "ilp",   1 },
1380 #endif
1381         { NULL,                     "",      0 }
1382 };
1383
1384 /*
1385     __  __       _         ____       _
1386    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1387    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1388    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1389    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1390
1391 */
1392
1393 void co_driver(be_chordal_env_t *cenv)
1394 {
1395 #ifdef WITH_LIBCORE
1396         lc_timer_t *timer = lc_timer_register("firm.be.copyopt", "runtime");
1397 #endif
1398         co_complete_stats_t before, after;
1399         copy_opt_t *co;
1400         co_algo_t  *algo_func;
1401         int was_optimal = 0;
1402
1403         if(algo < 0 || algo >= CO_ALGO_LAST)
1404                 return;
1405
1406         co = new_copy_opt(cenv, cost_func);
1407         co_build_ou_structure(co);
1408         co_build_graph_structure(co);
1409
1410         co_complete_stats(co, &before);
1411
1412         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1413         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1414         be_stat_ev_ull("co_max_costs",    before.max_costs);
1415         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1416         be_stat_ev_ull("co_aff_int",      before.aff_int);
1417
1418         be_stat_ev_ull("co_init_costs",   before.costs);
1419         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1420
1421         /* Dump the interference graph in Appel's format. */
1422         if(dump_flags & DUMP_APPEL) {
1423                 FILE *f = be_chordal_open(cenv, "", ".apl");
1424                 co_dump_appel_graph(co, f);
1425                 fclose(f);
1426         }
1427
1428         if(dump_flags & DUMP_BEFORE) {
1429                 FILE *f = be_chordal_open(cenv, "", "-before.dot");
1430                 co_dump_ifg_dot(co, f, style_flags);
1431                 fclose(f);
1432         }
1433
1434         /* if the algo can improve results, provide an initial solution with heur3 */
1435         if(improve && algos[algo].can_improve_existing) {
1436                 co_complete_stats_t stats;
1437
1438                 /* produce a heuristical solution */
1439                 co_solve_heuristic_java(co);
1440
1441                 /* do the stats and provide the current costs */
1442                 co_complete_stats(co, &stats);
1443                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1444         }
1445
1446 #ifdef WITH_JVM
1447         /* start the JVM here so that it does not tamper the timing. */
1448         if(algo == CO_ALGO_HEUR3)
1449                 be_java_coal_start_jvm();
1450 #endif
1451
1452         algo_func = algos[algo].algo;
1453
1454 #ifdef WITH_LIBCORE
1455         lc_timer_reset_and_start(timer);
1456 #endif
1457
1458         was_optimal = algo_func(co);
1459
1460 #ifdef WITH_LIBCORE
1461         lc_timer_stop(timer);
1462         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1463 #endif
1464
1465         be_stat_ev_ull("co_optimal", was_optimal);
1466
1467         if(dump_flags & DUMP_AFTER) {
1468                 FILE *f = be_chordal_open(cenv, "", "-after.dot");
1469                 co_dump_ifg_dot(co, f, style_flags);
1470                 fclose(f);
1471         }
1472
1473         co_complete_stats(co, &after);
1474
1475         if(do_stats) {
1476                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1477                 ulong64 evitable          = after.costs     - after.inevit_costs;
1478
1479                 ir_printf("%30F ", cenv->irg);
1480                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1481
1482                 if(optimizable_costs > 0)
1483                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1484                 else
1485                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1486         }
1487
1488         be_stat_ev_ull("co_after_costs", after.costs);
1489         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1490
1491         co_free_graph_structure(co);
1492         co_free_ou_structure(co);
1493         free_copy_opt(co);
1494 }