a6317d288922f9adbc6ad70ba8c06ac1afa6e0bf
[libfirm] / ir / be / becopyopt.c
1 /**
2  * Author:      Daniel Grund
3  * Date:                12.04.2005
4  * Copyright:   (c) Universitaet Karlsruhe
5  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  */
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 #ifdef HAVE_ALLOCA_H
11 #include <alloca.h>
12 #endif
13 #ifdef HAVE_MALLOC_H
14 #include <malloc.h>
15 #endif
16
17 #include "execfreq.h"
18 #include "xmalloc.h"
19 #include "debug.h"
20 #include "pmap.h"
21 #include "irgraph.h"
22 #include "irgwalk.h"
23 #include "irprog.h"
24 #include "irloop_t.h"
25 #include "iredges_t.h"
26 #include "phiclass.h"
27 #include "irbitset.h"
28 #include "irphase_t.h"
29 #include "irprintf_t.h"
30
31
32 #include "bearch.h"
33 #include "benode_t.h"
34 #include "beutil.h"
35 #include "beifg_t.h"
36 #include "becopyopt_t.h"
37 #include "becopystat.h"
38 #include "belive_t.h"
39 #include "beinsn_t.h"
40 #include "besched_t.h"
41 #include "benodesets.h"
42 #include "bejavacoal.h"
43 #include "bestatevent.h"
44
45 #ifdef WITH_LIBCORE
46 #include <libcore/lc_timing.h>
47 #include <libcore/lc_opts.h>
48 #endif /* WITH_LIBCORE */
49
50 #define DUMP_BEFORE 1
51 #define DUMP_AFTER  2
52 #define DUMP_APPEL  4
53 #define DUMP_ALL    2 * DUMP_APPEL - 1
54
55 #define COST_FUNC_FREQ     1
56 #define COST_FUNC_LOOP     2
57 #define COST_FUNC_ALL_ONE  3
58
59 static unsigned dump_flags    = 0;
60 static unsigned style_flags   = 0;
61 static unsigned  do_stats     = 0;
62 static cost_fct_t cost_func   = co_get_costs_exec_freq;
63 static unsigned algo          = CO_ALGO_HEUR2;
64 static int improve            = 1;
65
66 #ifdef WITH_LIBCORE
67 static const lc_opt_enum_mask_items_t dump_items[] = {
68         { "before",  DUMP_BEFORE },
69         { "after",   DUMP_AFTER  },
70         { "appel",   DUMP_APPEL  },
71         { "all",     DUMP_ALL    },
72         { NULL,      0 }
73 };
74
75 static const lc_opt_enum_mask_items_t style_items[] = {
76         { "color",   CO_IFG_DUMP_COLORS },
77         { "labels",  CO_IFG_DUMP_LABELS },
78         { "constr",  CO_IFG_DUMP_CONSTR },
79         { "shape",   CO_IFG_DUMP_SHAPE  },
80         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
81         { NULL,      0 }
82 };
83
84 static const lc_opt_enum_mask_items_t algo_items[] = {
85         { "none",   CO_ALGO_NONE  },
86         { "heur",   CO_ALGO_HEUR  },
87         { "heur2",  CO_ALGO_HEUR2 },
88 #ifdef WITH_JVM
89         { "heur3",  CO_ALGO_HEUR3 },
90 #endif /* WITH_JVM */
91 #ifdef WITH_ILP
92         { "ilp",    CO_ALGO_ILP   },
93 #endif /* WITH_ILP */
94         { NULL,     0 }
95 };
96
97 typedef int (*opt_funcptr)(void);
98
99 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
100         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
101         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
102         { "one",    (opt_funcptr) co_get_costs_all_one },
103         { NULL,     NULL }
104 };
105
106 static lc_opt_enum_mask_var_t dump_var = {
107         &dump_flags, dump_items
108 };
109
110 static lc_opt_enum_mask_var_t style_var = {
111         &style_flags, style_items
112 };
113
114 static lc_opt_enum_mask_var_t algo_var = {
115         &algo, algo_items
116 };
117
118 static lc_opt_enum_func_ptr_var_t cost_func_var = {
119         (opt_funcptr*) &cost_func, cost_func_items
120 };
121
122 static const lc_opt_table_entry_t options[] = {
123         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
124         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
125         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
126         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
127         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
128         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
129         { NULL }
130 };
131
132 /* Insert additional options registration functions here. */
133 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
134 extern void be_co2_register_options(lc_opt_entry_t *grp);
135 extern void be_co3_register_options(lc_opt_entry_t *grp);
136
137 void co_register_options(lc_opt_entry_t *grp)
138 {
139         lc_opt_entry_t *co_grp = lc_opt_get_grp(grp, "co");
140         lc_opt_add_table(co_grp, options);
141
142         be_co2_register_options(co_grp);
143         be_co3_register_options(co_grp);
144 #ifdef WITH_ILP
145         be_co_ilp_register_options(co_grp);
146 #endif
147 }
148 #endif
149
150
151 #undef QUICK_AND_DIRTY_HACK
152
153 /******************************************************************************
154     _____                           _
155    / ____|                         | |
156   | |  __  ___ _ __   ___ _ __ __ _| |
157   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
158   | |__| |  __/ | | |  __/ | | (_| | |
159    \_____|\___|_| |_|\___|_|  \__,_|_|
160
161  ******************************************************************************/
162
163 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
164
165
166 void be_copy_opt_init(void) {
167 }
168
169 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
170 {
171         const char *s1, *s2, *s3;
172         int len;
173         copy_opt_t *co;
174
175         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
176
177         co = xcalloc(1, sizeof(*co));
178         co->cenv      = chordal_env;
179         co->aenv      = chordal_env->birg->main_env->arch_env;
180         co->irg       = chordal_env->irg;
181         co->cls       = chordal_env->cls;
182         co->get_costs = get_costs;
183
184         s1 = get_irp_prog_name();
185         s2 = get_entity_name(get_irg_entity(co->irg));
186         s3 = chordal_env->cls->name;
187         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
188         co->name = xmalloc(len);
189         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
190
191         return co;
192 }
193
194 void free_copy_opt(copy_opt_t *co) {
195         xfree(co->name);
196         free(co);
197 }
198
199 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
200         arch_register_req_t req;
201         const arch_register_t *reg;
202
203         if (arch_irn_is(co->aenv, irn, ignore))
204                 return 0;
205
206         reg = arch_get_irn_register(co->aenv, irn);
207         if (arch_register_type_is(reg, ignore))
208                 return 0;
209
210         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(co->aenv, irn, &req))
211                 return 1;
212
213         return 0;
214 }
215
216 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
217         const ir_edge_t *edge;
218         const arch_register_t *reg;
219
220         assert(0 && "Is buggy and obsolete. Do not use");
221
222         if (arch_irn_is(co->aenv, irn, ignore))
223                 return 0;
224
225         reg = arch_get_irn_register(co->aenv, irn);
226         if (arch_register_type_is(reg, ignore))
227                 return 0;
228
229         foreach_out_edge(irn, edge) {
230                 ir_node *n = edge->src;
231
232                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
233                         arch_register_req_t req;
234                         arch_get_register_req(co->aenv, &req, n, -1);
235
236                         if(is_Reg_Phi(n) ||
237                            is_Perm(co->aenv, n) ||
238                            (arch_register_req_is(&req, should_be_same) && req.other_same == irn)
239                           )
240                                 return 1;
241                 }
242         }
243
244         return 0;
245 }
246
247 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
248         int cost = 0;
249         ir_loop *loop;
250         ir_node *root_block = get_nodes_block(root);
251
252         if (is_Phi(root)) {
253                 /* for phis the copies are placed in the corresponding pred-block */
254                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
255         } else {
256                 /* a perm places the copy in the same block as it resides */
257                 loop = get_irn_loop(root_block);
258         }
259         if (loop) {
260                 int d = get_loop_depth(loop);
261                 cost = d*d;
262         }
263         return 1+cost;
264 }
265
266 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
267         int res;
268         ir_node *root_bl = get_nodes_block(root);
269         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
270         res = get_block_execfreq_ulong(co->cenv->exec_freq, copy_bl);
271
272         /* don't allow values smaller than one. */
273         return res < 1 ? 1 : res;
274 }
275
276
277 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
278         return 1;
279 }
280
281 /******************************************************************************
282    ____        _   _    _       _ _          _____ _
283   / __ \      | | | |  | |     (_) |        / ____| |
284  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
285  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
286  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
287   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
288         | |                                                            __/ |
289         |_|                                                           |___/
290  ******************************************************************************/
291
292 /**
293  * Determines a maximum weighted independent set with respect to
294  * the interference and conflict edges of all nodes in a qnode.
295  */
296 static int ou_max_ind_set_costs(unit_t *ou) {
297         be_chordal_env_t *chordal_env = ou->co->cenv;
298         ir_node **safe, **unsafe;
299         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
300         bitset_t *curr;
301         int max, pos, curr_weight, best_weight = 0;
302
303         /* assign the nodes into two groups.
304          * safe: node has no interference, hence it is in every max stable set.
305          * unsafe: node has an interference
306          */
307         safe = alloca((ou->node_count-1) * sizeof(*safe));
308         safe_costs = 0;
309         safe_count = 0;
310         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
311         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
312         unsafe_count = 0;
313         for(i=1; i<ou->node_count; ++i) {
314                 int is_safe = 1;
315                 for(o=1; o<ou->node_count; ++o) {
316                         if (i==o)
317                                 continue;
318                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
319                                 unsafe_costs[unsafe_count] = ou->costs[i];
320                                 unsafe[unsafe_count] = ou->nodes[i];
321                                 ++unsafe_count;
322                                 is_safe = 0;
323                                 break;
324                         }
325                 }
326                 if (is_safe) {
327                         safe_costs += ou->costs[i];
328                         safe[safe_count++] = ou->nodes[i];
329                 }
330         }
331
332
333         /* now compute the best set out of the unsafe nodes*/
334         if (unsafe_count > MIS_HEUR_TRIGGER) {
335                 bitset_t *best = bitset_alloca(unsafe_count);
336                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
337                 for (i=0; i<unsafe_count; ++i) {
338                         bitset_set(best, i);
339                         /* check if it is a stable set */
340                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
341                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
342                                         bitset_clear(best, i); /* clear the bit and try next one */
343                                         break;
344                                 }
345                 }
346                 /* compute the weight */
347                 bitset_foreach(best, pos)
348                         best_weight += unsafe_costs[pos];
349         } else {
350                 /* Exact Algorithm: Brute force */
351                 curr = bitset_alloca(unsafe_count);
352                 bitset_set_all(curr);
353                 while ((max = bitset_popcnt(curr)) != 0) {
354                         /* check if curr is a stable set */
355                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
356                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
357                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
358                                                         goto no_stable_set;
359
360                         /* if we arrive here, we have a stable set */
361                         /* compute the weigth of the stable set*/
362                         curr_weight = 0;
363                         bitset_foreach(curr, pos)
364                                 curr_weight += unsafe_costs[pos];
365
366                         /* any better ? */
367                         if (curr_weight > best_weight) {
368                                 best_weight = curr_weight;
369                         }
370
371         no_stable_set:
372                         bitset_minus1(curr);
373                 }
374         }
375
376         return safe_costs+best_weight;
377 }
378
379 static void co_collect_units(ir_node *irn, void *env) {
380         copy_opt_t *co = env;
381         unit_t *unit;
382         arch_register_req_t req;
383
384         if (!is_curr_reg_class(co, irn))
385                 return;
386         if (!co_is_optimizable_root(co, irn))
387                 return;
388
389         /* Init a new unit */
390         unit = xcalloc(1, sizeof(*unit));
391         unit->co = co;
392         unit->node_count = 1;
393         INIT_LIST_HEAD(&unit->queue);
394
395         /* Phi with some/all of its arguments */
396         if (is_Reg_Phi(irn)) {
397                 int i, arity;
398
399                 /* init */
400                 arity = get_irn_arity(irn);
401                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
402                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
403                 unit->nodes[0] = irn;
404
405                 /* fill */
406                 for (i=0; i<arity; ++i) {
407                         int o, arg_pos;
408                         ir_node *arg = get_irn_n(irn, i);
409
410                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
411                         if (arg == irn)
412                                 continue;
413                         if (nodes_interfere(co->cenv, irn, arg)) {
414                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
415                                 continue;
416                         }
417
418                         /* Else insert the argument of the phi to the members of this ou */
419                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
420
421                         /* Check if arg has occurred at a prior position in the arg/list */
422                         arg_pos = 0;
423                         for (o=0; o<unit->node_count; ++o)
424                                 if (unit->nodes[o] == arg) {
425                                         arg_pos = o;
426                                         break;
427                                 }
428
429                         if (!arg_pos) { /* a new argument */
430                                 /* insert node, set costs */
431                                 unit->nodes[unit->node_count] = arg;
432                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
433                                 unit->node_count++;
434                         } else { /* arg has occured before in same phi */
435                                 /* increase costs for existing arg */
436                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
437                         }
438                 }
439                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
440                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
441         } else
442
443         /* Proj of a perm with corresponding arg */
444         if (is_Perm_Proj(co->aenv, irn)) {
445                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
446                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
447                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
448                 unit->node_count = 2;
449                 unit->nodes[0] = irn;
450                 unit->nodes[1] = get_Perm_src(irn);
451                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
452         } else
453
454         /* Src == Tgt of a 2-addr-code instruction */
455         if (is_2addr_code(co->aenv, irn, &req)) {
456                 ir_node *other = req.other_same;
457                 if (!nodes_interfere(co->cenv, irn, other)) {
458                         unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
459                         unit->costs = xmalloc(2 * sizeof(*unit->costs));
460                         unit->node_count = 2;
461                         unit->nodes[0] = irn;
462                         unit->nodes[1] = other;
463                         unit->costs[1] = co->get_costs(co, irn, other, -1);
464                 }
465         } else
466                 assert(0 && "This is not an optimizable node!");
467
468         /* Insert the new unit at a position according to its costs */
469         if (unit->node_count > 1) {
470                 int i;
471                 struct list_head *tmp;
472
473                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
474                 for(i=1; i<unit->node_count; ++i) {
475                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
476                         unit->all_nodes_costs += unit->costs[i];
477                 }
478
479                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
480                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
481                 /* Insert the new ou according to its sort_key */
482                 tmp = &co->units;
483                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
484                         tmp = tmp->next;
485                 list_add(&unit->units, tmp);
486         } else {
487                 free(unit);
488         }
489 }
490
491 #ifdef QUICK_AND_DIRTY_HACK
492
493 static int compare_ous(const void *k1, const void *k2) {
494         const unit_t *u1 = *((const unit_t **) k1);
495         const unit_t *u2 = *((const unit_t **) k2);
496         int i, o, u1_has_constr, u2_has_constr;
497         arch_register_req_t req;
498         const arch_env_t *aenv = u1->co->aenv;
499
500         /* Units with constraints come first */
501         u1_has_constr = 0;
502         for (i=0; i<u1->node_count; ++i) {
503                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
504                 if (arch_register_req_is(&req, limited)) {
505                         u1_has_constr = 1;
506                         break;
507                 }
508         }
509
510         u2_has_constr = 0;
511         for (i=0; i<u2->node_count; ++i) {
512                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
513                 if (arch_register_req_is(&req, limited)) {
514                         u2_has_constr = 1;
515                         break;
516                 }
517         }
518
519         if (u1_has_constr != u2_has_constr)
520                 return u2_has_constr - u1_has_constr;
521
522         /* Now check, whether the two units are connected */
523 #if 0
524         for (i=0; i<u1->node_count; ++i)
525                 for (o=0; o<u2->node_count; ++o)
526                         if (u1->nodes[i] == u2->nodes[o])
527                                 return 0;
528 #endif
529
530         /* After all, the sort key decides. Greater keys come first. */
531         return u2->sort_key - u1->sort_key;
532
533 }
534
535 /**
536  * Sort the ou's according to constraints and their sort_key
537  */
538 static void co_sort_units(copy_opt_t *co) {
539         int i, count = 0, costs;
540         unit_t *ou, **ous;
541
542         /* get the number of ous, remove them form the list and fill the array */
543         list_for_each_entry(unit_t, ou, &co->units, units)
544                 count++;
545         ous = alloca(count * sizeof(*ous));
546
547         costs = co_get_max_copy_costs(co);
548
549         i = 0;
550         list_for_each_entry(unit_t, ou, &co->units, units)
551                 ous[i++] = ou;
552
553         INIT_LIST_HEAD(&co->units);
554
555         assert(count == i && list_empty(&co->units));
556
557         for (i=0; i<count; ++i)
558                 ir_printf("%+F\n", ous[i]->nodes[0]);
559
560         qsort(ous, count, sizeof(*ous), compare_ous);
561
562         ir_printf("\n\n");
563         for (i=0; i<count; ++i)
564                 ir_printf("%+F\n", ous[i]->nodes[0]);
565
566         /* reinsert into list in correct order */
567         for (i=0; i<count; ++i)
568                 list_add_tail(&ous[i]->units, &co->units);
569
570         assert(costs == co_get_max_copy_costs(co));
571 }
572 #endif
573
574 void co_build_ou_structure(copy_opt_t *co) {
575         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
576         INIT_LIST_HEAD(&co->units);
577         irg_walk_graph(co->irg, co_collect_units, NULL, co);
578 #ifdef QUICK_AND_DIRTY_HACK
579         co_sort_units(co);
580 #endif
581 }
582
583 void co_free_ou_structure(copy_opt_t *co) {
584         unit_t *curr, *tmp;
585         ASSERT_OU_AVAIL(co);
586         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
587                 xfree(curr->nodes);
588                 xfree(curr->costs);
589                 xfree(curr);
590         }
591         co->units.next = NULL;
592 }
593
594 /* co_solve_heuristic() is implemented in becopyheur.c */
595
596 int co_get_max_copy_costs(const copy_opt_t *co) {
597         int i, res = 0;
598         unit_t *curr;
599
600         ASSERT_OU_AVAIL(co);
601
602         list_for_each_entry(unit_t, curr, &co->units, units) {
603                 res += curr->inevitable_costs;
604                 for (i=1; i<curr->node_count; ++i)
605                         res += curr->costs[i];
606         }
607         return res;
608 }
609
610 int co_get_inevit_copy_costs(const copy_opt_t *co) {
611         int res = 0;
612         unit_t *curr;
613
614         ASSERT_OU_AVAIL(co);
615
616         list_for_each_entry(unit_t, curr, &co->units, units)
617                 res += curr->inevitable_costs;
618         return res;
619 }
620
621 int co_get_copy_costs(const copy_opt_t *co) {
622         int i, res = 0;
623         unit_t *curr;
624
625         ASSERT_OU_AVAIL(co);
626
627         list_for_each_entry(unit_t, curr, &co->units, units) {
628                 int root_col = get_irn_col(co, curr->nodes[0]);
629                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
630                 res += curr->inevitable_costs;
631                 for (i=1; i<curr->node_count; ++i) {
632                         int arg_col = get_irn_col(co, curr->nodes[i]);
633                         if (root_col != arg_col) {
634                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
635                                 res += curr->costs[i];
636                         }
637                 }
638         }
639         return res;
640 }
641
642 int co_get_lower_bound(const copy_opt_t *co) {
643         int res = 0;
644         unit_t *curr;
645
646         ASSERT_OU_AVAIL(co);
647
648         list_for_each_entry(unit_t, curr, &co->units, units)
649                 res += curr->inevitable_costs + curr->min_nodes_costs;
650         return res;
651 }
652
653 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
654 {
655         bitset_t *seen = bitset_irg_malloc(co->irg);
656         affinity_node_t *an;
657
658         memset(stat, 0, sizeof(stat[0]));
659
660         /* count affinity edges. */
661         co_gs_foreach_aff_node(co, an) {
662                 neighb_t *neigh;
663                 stat->aff_nodes += 1;
664                 bitset_add_irn(seen, an->irn);
665                 co_gs_foreach_neighb(an, neigh) {
666                         if(!bitset_contains_irn(seen, neigh->irn)) {
667                                 stat->aff_edges += 1;
668                                 stat->max_costs += neigh->costs;
669
670                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
671                                         stat->costs += neigh->costs;
672                                         stat->unsatisfied_edges += 1;
673                                 }
674
675                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
676                                         stat->aff_int += 1;
677                                         stat->inevit_costs += neigh->costs;
678                                 }
679
680                         }
681                 }
682         }
683
684         bitset_free(seen);
685 }
686
687 /******************************************************************************
688    _____                 _        _____ _
689   / ____|               | |      / ____| |
690  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
691  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
692  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
693   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
694                   | |                                       __/ |
695                   |_|                                      |___/
696  ******************************************************************************/
697
698 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
699         const affinity_node_t *n1 = k1;
700         const affinity_node_t *n2 = k2;
701
702         return (n1->irn != n2->irn);
703 }
704
705 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
706         affinity_node_t new_node, *node;
707         neighb_t new_nbr, *nbr;
708         int allocnew;
709
710         new_node.irn        = n1;
711         new_node.degree     = 0;
712         new_node.neighbours = NULL;
713         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
714
715         allocnew = 1;
716         for (nbr = node->neighbours; nbr; nbr = nbr->next)
717                 if (nbr->irn == n2) {
718                         allocnew = 0;
719                         break;
720                 }
721
722         /* if we did not find n2 in n1's neighbourhood insert it */
723         if (allocnew) {
724                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
725                 nbr = obstack_finish(&co->obst);
726                 nbr->irn   = n2;
727                 nbr->costs = 0;
728                 nbr->next  = node->neighbours;
729                 node->neighbours = nbr;
730                 node->degree++;
731         }
732
733         /* now nbr points to n1's neighbour-entry of n2 */
734         nbr->costs += costs;
735 }
736
737 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
738         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
739                 add_edge(co, n1, n2, costs);
740                 add_edge(co, n2, n1, costs);
741         }
742 }
743
744 static void build_graph_walker(ir_node *irn, void *env) {
745         copy_opt_t *co = env;
746         int pos, max;
747         arch_register_req_t req;
748         const arch_register_t *reg;
749
750         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
751                 return;
752
753         reg = arch_get_irn_register(co->aenv, irn);
754         if (arch_register_type_is(reg, ignore))
755                 return;
756
757         /* Phis */
758         if (is_Reg_Phi(irn))
759                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
760                         ir_node *arg = get_irn_n(irn, pos);
761                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
762                 }
763
764         /* Perms */
765         else if (is_Perm_Proj(co->aenv, irn)) {
766                 ir_node *arg = get_Perm_src(irn);
767                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
768         }
769
770         /* 2-address code */
771         else if (is_2addr_code(co->aenv, irn, &req))
772                 add_edges(co, irn, req.other_same, co->get_costs(co, irn, req.other_same, 0));
773 }
774
775 void co_build_graph_structure(copy_opt_t *co) {
776         obstack_init(&co->obst);
777         co->nodes = new_set(compare_affinity_node_t, 32);
778
779         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
780 }
781
782 void co_free_graph_structure(copy_opt_t *co) {
783         ASSERT_GS_AVAIL(co);
784
785         del_set(co->nodes);
786         obstack_free(&co->obst, NULL);
787         co->nodes = NULL;
788 }
789
790 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
791
792 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
793         affinity_node_t new_node, *n;
794
795         ASSERT_GS_AVAIL(co);
796
797         new_node.irn = irn;
798         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
799         if (n) {
800                 return (n->degree > 0);
801         } else
802                 return 0;
803 }
804
805 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
806 {
807         be_ifg_t *ifg  = co->cenv->ifg;
808         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
809         bitset_t *adm  = bitset_alloca(co->cls->n_regs);
810
811         ir_node *irn;
812         void *it, *nit;
813         int i, n, n_regs;
814
815         n_regs = 0;
816         for(i = 0; i < co->cls->n_regs; ++i) {
817                 const arch_register_t *reg = &co->cls->regs[i];
818                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
819         }
820
821         /*
822          * n contains the first node number.
823          * the values below n are the pre-colored register nodes
824          */
825
826         it  = be_ifg_nodes_iter_alloca(ifg);
827         nit = be_ifg_neighbours_iter_alloca(ifg);
828
829         n = n_regs;
830         be_ifg_foreach_node(ifg, it, irn) {
831                 if(!arch_irn_is(co->aenv, irn, ignore))
832                         set_irn_link(irn, INT_TO_PTR(n++));
833         }
834
835         fprintf(f, "%d %d\n", n, n_regs);
836
837         be_ifg_foreach_node(ifg, it, irn) {
838                 if(!arch_irn_is(co->aenv, irn, ignore)) {
839                         int idx            = PTR_TO_INT(get_irn_link(irn));
840                         affinity_node_t *a = get_affinity_info(co, irn);
841
842                         arch_register_req_t req;
843                         ir_node *adj;
844
845                         arch_get_register_req(co->aenv, &req, irn, BE_OUT_POS(0));
846                         if(arch_register_req_is(&req, limited)) {
847                                 bitset_clear_all(adm);
848                                 req.limited(req.limited_env, adm);
849                                 for(i = 0; i < co->cls->n_regs; ++i)
850                                         if(!bitset_is_set(adm, i) && color_map[i] >= 0)
851                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
852
853                         }
854
855
856                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
857                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
858                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
859                                         if(idx < adj_idx)
860                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
861                                 }
862                         }
863
864                         if(a) {
865                                 neighb_t *n;
866
867                                 co_gs_foreach_neighb(a, n) {
868                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
869                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
870                                                 if(idx < n_idx)
871                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
872                                         }
873                                 }
874                         }
875                 }
876         }
877 }
878
879 typedef struct _appel_clique_walker_t {
880         phase_t ph;
881         const copy_opt_t *co;
882         int curr_nr;
883         int node_count;
884         FILE *f;
885         int dumb;
886         int *color_map;
887         struct obstack obst;
888 } appel_clique_walker_t;
889
890 typedef struct _appel_block_info_t {
891         int *live_end_nr;
892         int *live_in_nr;
893         int *phi_nr;
894         ir_node **live_end;
895         ir_node **live_in;
896         ir_node **phi;
897         int n_live_end;
898         int n_live_in;
899         int n_phi;
900 } appel_block_info_t;
901
902 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
903 {
904 #if 0
905         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
906         int res = (int) freq;
907         return res == 0 ? 1 : res;
908 #else
909         ir_loop *loop = get_irn_loop(bl);
910         if(loop) {
911                 int d = get_loop_depth(loop);
912                 return 1 + d * d;
913         }
914         return 1;
915 #endif
916 }
917
918 static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old)
919 {
920         appel_block_info_t *res = NULL;
921
922         if(is_Block(irn)) {
923                 appel_clique_walker_t *d = (void *) phase;
924                 res = phase_alloc(phase, sizeof(res[0]));
925                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
926                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
927                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
928                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
929                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
930                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
931         }
932
933         return res;
934 }
935
936 typedef struct _insn_list_t {
937         be_insn_t *insn;
938         struct list_head list;
939 } insn_list_t;
940
941 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
942 {
943         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
944         int i;
945
946         for(i = 0; i < bli->n_live_end; ++i)
947                 if(bli->live_end[i] == irn)
948                         return bli->live_end_nr[i];
949
950         return -1;
951 }
952
953 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
954 {
955         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
956         ir_node *irn;
957         int n_live;
958         int j;
959
960         n_live = 0;
961         foreach_pset(live, irn)
962                 live_arr[n_live++] = irn;
963
964         /* dump the live after clique */
965         if(!env->dumb) {
966                 for(j = 0; j < n_live; ++j) {
967                         int k;
968
969                         for(k = j + 1; k < n_live; ++k) {
970                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
971                         }
972                         fprintf(env->f, "\n");
973                 }
974         }
975
976         /* dump the affinities */
977         for(j = 0; !env->dumb && j < n_live; ++j) {
978                 ir_node *irn = live_arr[j];
979                 int old_nr = PTR_TO_INT(get_irn_link(irn));
980
981                 /* if the node was already live in the last insn dump the affinity */
982                 if(old_nr > start_nr) {
983                         int weight = appel_aff_weight(env, bl);
984                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
985                 }
986         }
987
988         /* set the current numbers into the link field. */
989         for(j = 0; j < n_live; ++j) {
990                 ir_node *irn = live_arr[j];
991                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
992         }
993
994         return curr_nr + n_live;
995 }
996
997 static void appel_walker(ir_node *bl, void *data)
998 {
999         appel_clique_walker_t *env = data;
1000         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1001         struct obstack *obst       = &env->obst;
1002         void *base                 = obstack_base(obst);
1003         pset *live                 = pset_new_ptr_default();
1004
1005         int n_insns  = 0;
1006         int n_nodes  = 0;
1007         int start_nr = env->curr_nr;
1008         int curr_nr  = start_nr;
1009
1010         be_insn_env_t insn_env;
1011         int i, j;
1012         ir_node *irn;
1013         be_insn_t **insns;
1014
1015         insn_env.aenv = env->co->aenv;
1016         insn_env.cls  = env->co->cls;
1017         insn_env.obst = obst;
1018         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1019
1020         /* Guess how many insns will be in this block. */
1021         sched_foreach(bl, irn)
1022                 n_nodes++;
1023
1024         bli->n_phi = 0;
1025         insns = malloc(n_nodes * sizeof(insns[0]));
1026
1027         /* Put all insns in an array. */
1028         irn = sched_first(bl);
1029         while(!sched_is_end(irn)) {
1030                 be_insn_t *insn;
1031                 insn = be_scan_insn(&insn_env, irn);
1032                 insns[n_insns++] = insn;
1033                 irn = insn->next_insn;
1034         }
1035
1036         DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl));
1037         be_liveness_end_of_block(env->co->cenv->lv, env->co->aenv, env->co->cls, bl, live);
1038
1039         /* Generate the bad and ugly. */
1040         for(i = n_insns - 1; i >= 0; --i) {
1041                 be_insn_t *insn = insns[i];
1042
1043                 /* The first live set has to be saved in the block border set. */
1044                 if(i == n_insns - 1) {
1045                         j = 0;
1046                         foreach_pset(live, irn) {
1047                                 bli->live_end[j]    = irn;
1048                                 bli->live_end_nr[j] = curr_nr + j;
1049                                 ++j;
1050                         }
1051                         bli->n_live_end = j;
1052                 }
1053
1054                 if(!env->dumb) {
1055                         for(j = 0; j < insn->use_start; ++j) {
1056                                 ir_node *op   = insn->ops[j].carrier;
1057                                 bitset_t *adm = insn->ops[j].regs;
1058                                 int k;
1059                                 int nr;
1060
1061                                 if(!insn->ops[j].has_constraints)
1062                                         continue;
1063
1064                                 nr = 0;
1065                                 foreach_pset(live, irn) {
1066                                         if(irn == op) {
1067                                                 pset_break(live);
1068                                                 break;
1069                                         }
1070                                         ++nr;
1071                                 }
1072
1073                                 assert(nr < pset_count(live));
1074
1075                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1076                                         int mapped_col = env->color_map[k];
1077                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1078                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1079                                 }
1080                         }
1081                 }
1082
1083                 /* dump the clique and update the stuff. */
1084                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1085
1086                 /* remove all defs. */
1087                 for(j = 0; j < insn->use_start; ++j)
1088                         pset_remove_ptr(live, insn->ops[j].carrier);
1089
1090                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1091                         bli->phi[bli->n_phi]    = insn->irn;
1092                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1093                         bli->n_phi++;
1094                 }
1095
1096                 /* add all uses */
1097                 else
1098                         for(j = insn->use_start; j < insn->n_ops; ++j)
1099                                 pset_insert_ptr(live, insn->ops[j].carrier);
1100         }
1101
1102         /* print the start clique. */
1103         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1104
1105         i = 0;
1106         foreach_pset(live, irn) {
1107                 bli->live_in[i]    = irn;
1108                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1109                 ++i;
1110         }
1111         bli->n_live_in = i;
1112
1113         del_pset(live);
1114         free(insns);
1115         obstack_free(obst, base);
1116         env->curr_nr = curr_nr;
1117 }
1118
1119 static void appel_inter_block_aff(ir_node *bl, void *data)
1120 {
1121         appel_clique_walker_t *env = data;
1122         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1123
1124         int i, j, n;
1125
1126         for(i = 0; i < bli->n_live_in; ++i) {
1127                 ir_node *irn = bli->live_in[i];
1128
1129                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1130                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1131
1132                         int nr = appel_get_live_end_nr(env, pred, irn);
1133                         assert(nr >= 0);
1134                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1135                 }
1136         }
1137
1138         for(i = 0; i < bli->n_phi; ++i) {
1139                 ir_node *irn = bli->phi[i];
1140
1141                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1142                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1143                         ir_node *op = get_irn_n(irn, j);
1144
1145                         int nr = appel_get_live_end_nr(env, pred, op);
1146                         assert(nr >= 0);
1147                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1148                 }
1149         }
1150
1151 }
1152
1153 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1154 {
1155         int i;
1156         int n_colors;
1157         appel_clique_walker_t env;
1158         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1159
1160         be_liveness_recompute(co->cenv->lv);
1161         obstack_init(&env.obst);
1162         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init);
1163         env.curr_nr = co->cls->n_regs;
1164         env.co = co;
1165         env.f = f;
1166
1167         bitset_copy(adm, co->cenv->ignore_colors);
1168         bitset_flip_all(adm);
1169
1170         /* Make color map. */
1171         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1172         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1173                 const arch_register_t *reg = &co->cls->regs[i];
1174                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1175         }
1176
1177         env.dumb = 1;
1178         env.curr_nr = n_colors;
1179         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1180         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1181
1182         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1183
1184         /* make the first k nodes interfere */
1185         for(i = 0; i < n_colors; ++i) {
1186                 int j;
1187                 for(j = i + 1; j < n_colors; ++j)
1188                         fprintf(f, "%d %d -1 ", i, j);
1189                 fprintf(f, "\n");
1190         }
1191
1192         env.dumb = 0;
1193         env.curr_nr = n_colors;
1194         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1195         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1196         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1197         obstack_free(&env.obst, NULL);
1198 }
1199
1200 /*
1201          ___ _____ ____   ____   ___ _____   ____                        _
1202         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1203          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1204          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1205         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1206                                                                   |_|            |___/
1207 */
1208
1209 static const char *get_dot_color_name(int col)
1210 {
1211         static const char *names[] = {
1212                 "blue",
1213                 "red",
1214                 "green",
1215                 "yellow",
1216                 "cyan",
1217                 "magenta",
1218                 "orange",
1219                 "chocolate",
1220                 "beige",
1221                 "navy",
1222                 "darkgreen",
1223                 "darkred",
1224                 "lightPink",
1225                 "chartreuse",
1226                 "lightskyblue",
1227                 "linen",
1228                 "pink",
1229                 "lightslateblue",
1230                 "mintcream",
1231                 "red",
1232                 "darkolivegreen",
1233                 "mediumblue",
1234                 "mistyrose",
1235                 "salmon",
1236                 "darkseagreen",
1237                 "mediumslateblue"
1238                 "moccasin",
1239                 "tomato",
1240                 "forestgreen",
1241                 "darkturquoise",
1242                 "palevioletred"
1243         };
1244
1245         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1246 }
1247
1248 typedef struct _co_ifg_dump_t {
1249         const copy_opt_t *co;
1250         unsigned flags;
1251 } co_ifg_dump_t;
1252
1253 static void ifg_dump_graph_attr(FILE *f, void *self)
1254 {
1255         fprintf(f, "overlap=scale");
1256 }
1257
1258 static int ifg_is_dump_node(void *self, ir_node *irn)
1259 {
1260         co_ifg_dump_t *cod = self;
1261         return !arch_irn_is(cod->co->aenv, irn, ignore);
1262 }
1263
1264 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1265 {
1266         co_ifg_dump_t *env         = self;
1267         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1268         arch_register_req_t req;
1269         int limited;
1270
1271         arch_get_register_req(env->co->aenv, &req, irn, BE_OUT_POS(0));
1272         limited = arch_register_req_is(&req, limited);
1273
1274         if(env->flags & CO_IFG_DUMP_LABELS) {
1275                 ir_fprintf(f, "label=\"%+F", irn);
1276
1277                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1278                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1279                         req.limited(req.limited_env, bs);
1280                         ir_fprintf(f, "\\n%B", bs);
1281                 }
1282                 ir_fprintf(f, "\" ");
1283         }
1284
1285         else
1286                 fprintf(f, "label=\"\" shape=point " );
1287
1288         if(env->flags & CO_IFG_DUMP_SHAPE)
1289                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1290
1291         if(env->flags & CO_IFG_DUMP_COLORS)
1292                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1293 }
1294
1295 static void ifg_dump_at_end(FILE *file, void *self)
1296 {
1297         co_ifg_dump_t *env = self;
1298         affinity_node_t *a;
1299
1300         co_gs_foreach_aff_node(env->co, a) {
1301                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1302                 unsigned aidx = get_irn_idx(a->irn);
1303                 neighb_t *n;
1304
1305                 co_gs_foreach_neighb(a, n) {
1306                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1307                         unsigned nidx = get_irn_idx(n->irn);
1308
1309                         if(aidx < nidx) {
1310                                 const char *color = nr == ar ? "blue" : "red";
1311                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1312                                 if(env->flags & CO_IFG_DUMP_LABELS)
1313                                         fprintf(file, "label=\"%d\" ", n->costs);
1314                                 if(env->flags & CO_IFG_DUMP_COLORS)
1315                                         fprintf(file, "color=%s ", color);
1316                                 else
1317                                         fprintf(file, "style=dotted");
1318                                 fprintf(file, "];\n");
1319                         }
1320                 }
1321         }
1322 }
1323
1324
1325 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1326         ifg_is_dump_node,
1327         ifg_dump_graph_attr,
1328         ifg_dump_node_attr,
1329         NULL,
1330         NULL,
1331         ifg_dump_at_end
1332 };
1333
1334
1335
1336 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1337 {
1338         co_ifg_dump_t cod;
1339
1340         cod.co    = co;
1341         cod.flags = flags;
1342         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1343 }
1344
1345
1346 void co_solve_park_moon(copy_opt_t *opt)
1347 {
1348
1349 }
1350
1351 static int void_algo(copy_opt_t *co)
1352 {
1353         return 0;
1354 }
1355
1356 /*
1357                 _    _                  _ _   _
1358            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1359           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1360          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1361         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1362                            |___/
1363 */
1364
1365 typedef struct {
1366         co_algo_t *algo;
1367         const char *name;
1368         int can_improve_existing;
1369 } co_algo_info_t;
1370
1371 static co_algo_info_t algos[] = {
1372         { void_algo,                "none",  0 },
1373         { co_solve_heuristic,       "heur1", 0 },
1374         { co_solve_heuristic_new,   "heur2", 0 },
1375         { co_solve_heuristic_java,  "heur3", 0 },
1376 #ifdef WITH_ILP
1377         { co_solve_ilp2,            "ilp",   1 },
1378 #endif
1379         { NULL,                     "",      0 }
1380 };
1381
1382 /*
1383     __  __       _         ____       _
1384    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1385    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1386    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1387    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1388
1389 */
1390
1391 void co_driver(be_chordal_env_t *cenv)
1392 {
1393 #ifdef WITH_LIBCORE
1394         lc_timer_t *timer = lc_timer_register("firm.be.copyopt", "runtime");
1395 #endif
1396         co_complete_stats_t before, after;
1397         copy_opt_t *co;
1398         co_algo_t  *algo_func;
1399         int was_optimal = 0;
1400
1401         if(algo < 0 || algo >= CO_ALGO_LAST)
1402                 return;
1403
1404         co = new_copy_opt(cenv, cost_func);
1405         co_build_ou_structure(co);
1406         co_build_graph_structure(co);
1407
1408         co_complete_stats(co, &before);
1409
1410         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1411         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1412         be_stat_ev_ull("co_max_costs",    before.max_costs);
1413         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1414         be_stat_ev_ull("co_aff_int",      before.aff_int);
1415
1416         be_stat_ev_ull("co_init_costs",   before.costs);
1417         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1418
1419         /* Dump the interference graph in Appel's format. */
1420         if(dump_flags & DUMP_APPEL) {
1421                 FILE *f = be_chordal_open(cenv, "", ".apl");
1422                 co_dump_appel_graph(co, f);
1423                 fclose(f);
1424         }
1425
1426         if(dump_flags & DUMP_BEFORE) {
1427                 FILE *f = be_chordal_open(cenv, "", "-before.dot");
1428                 co_dump_ifg_dot(co, f, style_flags);
1429                 fclose(f);
1430         }
1431
1432         /* if the algo can improve results, provide an initial solution with heur3 */
1433         if(improve && algos[algo].can_improve_existing) {
1434                 co_complete_stats_t stats;
1435
1436                 /* produce a heuristical solution */
1437                 co_solve_heuristic_java(co);
1438
1439                 /* do the stats and provide the current costs */
1440                 co_complete_stats(co, &stats);
1441                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1442         }
1443
1444 #ifdef WITH_JVM
1445         /* start the JVM here so that it does not tamper the timing. */
1446         if(algo == CO_ALGO_HEUR3)
1447                 be_java_coal_start_jvm();
1448 #endif
1449
1450         algo_func = algos[algo].algo;
1451
1452 #ifdef WITH_LIBCORE
1453         lc_timer_reset_and_start(timer);
1454 #endif
1455
1456         was_optimal = algo_func(co);
1457
1458 #ifdef WITH_LIBCORE
1459         lc_timer_stop(timer);
1460         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1461 #endif
1462
1463         be_stat_ev_ull("co_optimal", was_optimal);
1464
1465         if(dump_flags & DUMP_AFTER) {
1466                 FILE *f = be_chordal_open(cenv, "", "-after.dot");
1467                 co_dump_ifg_dot(co, f, style_flags);
1468                 fclose(f);
1469         }
1470
1471         co_complete_stats(co, &after);
1472
1473         if(do_stats) {
1474                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1475                 ulong64 evitable          = after.costs     - after.inevit_costs;
1476
1477                 ir_printf("%30F ", cenv->irg);
1478                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1479
1480                 if(optimizable_costs > 0)
1481                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1482                 else
1483                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1484         }
1485
1486         be_stat_ev_ull("co_after_costs", after.costs);
1487         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1488
1489         co_free_graph_structure(co);
1490         co_free_ou_structure(co);
1491         free_copy_opt(co);
1492 }