8d2c61868dd3a50c834c9db2d660d0e338327c65
[libfirm] / ir / be / becopyopt.c
1 /**
2  * Author:      Daniel Grund
3  * Date:                12.04.2005
4  * Copyright:   (c) Universitaet Karlsruhe
5  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  */
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 #ifdef HAVE_ALLOCA_H
11 #include <alloca.h>
12 #endif
13 #ifdef HAVE_MALLOC_H
14 #include <malloc.h>
15 #endif
16
17 #include "execfreq.h"
18 #include "xmalloc.h"
19 #include "debug.h"
20 #include "pmap.h"
21 #include "irgraph.h"
22 #include "irgwalk.h"
23 #include "irprog.h"
24 #include "irloop_t.h"
25 #include "iredges_t.h"
26 #include "phiclass.h"
27 #include "irbitset.h"
28 #include "irphase_t.h"
29 #include "irprintf_t.h"
30
31
32 #include "bearch.h"
33 #include "benode_t.h"
34 #include "beutil.h"
35 #include "beifg_t.h"
36 #include "becopyopt_t.h"
37 #include "becopystat.h"
38 #include "belive_t.h"
39 #include "beinsn_t.h"
40 #include "besched_t.h"
41 #include "benodesets.h"
42 #include "bejavacoal.h"
43 #include "bestatevent.h"
44
45 #ifdef WITH_LIBCORE
46 #include <libcore/lc_timing.h>
47 #include <libcore/lc_opts.h>
48 #endif /* WITH_LIBCORE */
49
50 #define DUMP_BEFORE 1
51 #define DUMP_AFTER  2
52 #define DUMP_APPEL  4
53 #define DUMP_ALL    2 * DUMP_APPEL - 1
54
55 #define COST_FUNC_FREQ     1
56 #define COST_FUNC_LOOP     2
57 #define COST_FUNC_ALL_ONE  3
58
59 static int dump_flags         = 0;
60 static int style_flags        = 0;
61 static int do_stats           = 0;
62 static cost_fct_t cost_func   = co_get_costs_exec_freq;
63 static int algo               = CO_ALGO_HEUR2;
64 static int improve            = 1;
65
66 #ifdef WITH_LIBCORE
67 static const lc_opt_enum_mask_items_t dump_items[] = {
68         { "before",  DUMP_BEFORE },
69         { "after",   DUMP_AFTER  },
70         { "appel",   DUMP_APPEL  },
71         { "all",     DUMP_ALL    },
72         { NULL,      0 }
73 };
74
75 static const lc_opt_enum_mask_items_t style_items[] = {
76         { "color",   CO_IFG_DUMP_COLORS },
77         { "labels",  CO_IFG_DUMP_LABELS },
78         { "constr",  CO_IFG_DUMP_CONSTR },
79         { "shape",   CO_IFG_DUMP_SHAPE  },
80         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
81         { NULL,      0 }
82 };
83
84 static const lc_opt_enum_mask_items_t algo_items[] = {
85         { "none",   CO_ALGO_NONE  },
86         { "heur",   CO_ALGO_HEUR  },
87         { "heur2",  CO_ALGO_HEUR2 },
88         { "heur3",  CO_ALGO_HEUR3 },
89         { "ilp",    CO_ALGO_ILP   },
90         { NULL,     0 }
91 };
92
93 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
94         { "freq",   co_get_costs_exec_freq },
95         { "loop",   co_get_costs_loop_depth },
96         { "one",    co_get_costs_all_one },
97         { NULL,     0 }
98 };
99
100 static lc_opt_enum_mask_var_t dump_var = {
101         &dump_flags, dump_items
102 };
103
104 static lc_opt_enum_mask_var_t style_var = {
105         &style_flags, style_items
106 };
107
108 static lc_opt_enum_mask_var_t algo_var = {
109         &algo, algo_items
110 };
111
112 static lc_opt_enum_func_ptr_var_t cost_func_var = {
113         &cost_func, cost_func_items
114 };
115
116 static const lc_opt_table_entry_t options[] = {
117         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo (heur, heur2, heur3, ilp)", &algo_var),
118         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function (freq, loop, one)",                &cost_func_var),
119         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
120         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
121         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
122         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
123         { NULL }
124 };
125
126 /* Insert additional options registration functions here. */
127 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
128 extern void be_co2_register_options(lc_opt_entry_t *grp);
129 extern void be_co3_register_options(lc_opt_entry_t *grp);
130
131 void co_register_options(lc_opt_entry_t *grp)
132 {
133         lc_opt_entry_t *co_grp = lc_opt_get_grp(grp, "co");
134         lc_opt_add_table(co_grp, options);
135
136         be_co2_register_options(co_grp);
137         be_co3_register_options(co_grp);
138 #ifdef WITH_ILP
139         be_co_ilp_register_options(co_grp);
140 #endif
141 }
142 #endif
143
144
145 #undef QUICK_AND_DIRTY_HACK
146
147 /******************************************************************************
148     _____                           _
149    / ____|                         | |
150   | |  __  ___ _ __   ___ _ __ __ _| |
151   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
152   | |__| |  __/ | | |  __/ | | (_| | |
153    \_____|\___|_| |_|\___|_|  \__,_|_|
154
155  ******************************************************************************/
156
157 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
158
159 void be_copy_opt_init(void) {
160         if(algo == CO_ALGO_HEUR3)
161                 be_java_coal_start_jvm();
162 }
163
164 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
165 {
166         const char *s1, *s2, *s3;
167         int len;
168         copy_opt_t *co;
169
170         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
171
172         co = xcalloc(1, sizeof(*co));
173         co->cenv      = chordal_env;
174         co->aenv      = chordal_env->birg->main_env->arch_env;
175         co->irg       = chordal_env->irg;
176         co->cls       = chordal_env->cls;
177         co->get_costs = get_costs;
178
179         s1 = get_irp_prog_name();
180         s2 = get_entity_name(get_irg_entity(co->irg));
181         s3 = chordal_env->cls->name;
182         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
183         co->name = xmalloc(len);
184         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
185
186         return co;
187 }
188
189 void free_copy_opt(copy_opt_t *co) {
190         xfree(co->name);
191         free(co);
192 }
193
194 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
195         arch_register_req_t req;
196         const arch_register_t *reg;
197
198         if (arch_irn_is(co->aenv, irn, ignore))
199                 return 0;
200
201         reg = arch_get_irn_register(co->aenv, irn);
202         if (arch_register_type_is(reg, ignore))
203                 return 0;
204
205         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(co->aenv, irn, &req))
206                 return 1;
207
208         return 0;
209 }
210
211 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
212         const ir_edge_t *edge;
213         const arch_register_t *reg;
214
215         assert(0 && "Is buggy and obsolete. Do not use");
216
217         if (arch_irn_is(co->aenv, irn, ignore))
218                 return 0;
219
220         reg = arch_get_irn_register(co->aenv, irn);
221         if (arch_register_type_is(reg, ignore))
222                 return 0;
223
224         foreach_out_edge(irn, edge) {
225                 ir_node *n = edge->src;
226
227                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
228                         arch_register_req_t req;
229                         arch_get_register_req(co->aenv, &req, n, -1);
230
231                         if(is_Reg_Phi(n) ||
232                            is_Perm(co->aenv, n) ||
233                            (arch_register_req_is(&req, should_be_same) && req.other_same == irn)
234                           )
235                                 return 1;
236                 }
237         }
238
239         return 0;
240 }
241
242 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
243         int cost = 0;
244         ir_loop *loop;
245         ir_node *root_block = get_nodes_block(root);
246
247         if (is_Phi(root)) {
248                 /* for phis the copies are placed in the corresponding pred-block */
249                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
250         } else {
251                 /* a perm places the copy in the same block as it resides */
252                 loop = get_irn_loop(root_block);
253         }
254         if (loop) {
255                 int d = get_loop_depth(loop);
256                 cost = d*d;
257         }
258         return 1+cost;
259 }
260
261 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
262         int res;
263         ir_node *root_bl = get_nodes_block(root);
264         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
265         res = get_block_execfreq_ulong(co->cenv->exec_freq, copy_bl);
266
267         /* don't allow values smaller than one. */
268         return res < 1 ? 1 : res;
269 }
270
271
272 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
273         return 1;
274 }
275
276 /******************************************************************************
277    ____        _   _    _       _ _          _____ _
278   / __ \      | | | |  | |     (_) |        / ____| |
279  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
280  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
281  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
282   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
283         | |                                                            __/ |
284         |_|                                                           |___/
285  ******************************************************************************/
286
287 /**
288  * Determines a maximum weighted independent set with respect to
289  * the interference and conflict edges of all nodes in a qnode.
290  */
291 static int ou_max_ind_set_costs(unit_t *ou) {
292         be_chordal_env_t *chordal_env = ou->co->cenv;
293         ir_node **safe, **unsafe;
294         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
295         bitset_t *curr;
296         int max, pos, curr_weight, best_weight = 0;
297
298         /* assign the nodes into two groups.
299          * safe: node has no interference, hence it is in every max stable set.
300          * unsafe: node has an interference
301          */
302         safe = alloca((ou->node_count-1) * sizeof(*safe));
303         safe_costs = 0;
304         safe_count = 0;
305         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
306         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
307         unsafe_count = 0;
308         for(i=1; i<ou->node_count; ++i) {
309                 int is_safe = 1;
310                 for(o=1; o<ou->node_count; ++o) {
311                         if (i==o)
312                                 continue;
313                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
314                                 unsafe_costs[unsafe_count] = ou->costs[i];
315                                 unsafe[unsafe_count] = ou->nodes[i];
316                                 ++unsafe_count;
317                                 is_safe = 0;
318                                 break;
319                         }
320                 }
321                 if (is_safe) {
322                         safe_costs += ou->costs[i];
323                         safe[safe_count++] = ou->nodes[i];
324                 }
325         }
326
327
328         /* now compute the best set out of the unsafe nodes*/
329         if (unsafe_count > MIS_HEUR_TRIGGER) {
330                 bitset_t *best = bitset_alloca(unsafe_count);
331                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
332                 for (i=0; i<unsafe_count; ++i) {
333                         bitset_set(best, i);
334                         /* check if it is a stable set */
335                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
336                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
337                                         bitset_clear(best, i); /* clear the bit and try next one */
338                                         break;
339                                 }
340                 }
341                 /* compute the weight */
342                 bitset_foreach(best, pos)
343                         best_weight += unsafe_costs[pos];
344         } else {
345                 /* Exact Algorithm: Brute force */
346                 curr = bitset_alloca(unsafe_count);
347                 bitset_set_all(curr);
348                 while ((max = bitset_popcnt(curr)) != 0) {
349                         /* check if curr is a stable set */
350                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
351                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
352                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
353                                                         goto no_stable_set;
354
355                         /* if we arrive here, we have a stable set */
356                         /* compute the weigth of the stable set*/
357                         curr_weight = 0;
358                         bitset_foreach(curr, pos)
359                                 curr_weight += unsafe_costs[pos];
360
361                         /* any better ? */
362                         if (curr_weight > best_weight) {
363                                 best_weight = curr_weight;
364                         }
365
366         no_stable_set:
367                         bitset_minus1(curr);
368                 }
369         }
370
371         return safe_costs+best_weight;
372 }
373
374 static void co_collect_units(ir_node *irn, void *env) {
375         copy_opt_t *co = env;
376         unit_t *unit;
377         arch_register_req_t req;
378
379         if (!is_curr_reg_class(co, irn))
380                 return;
381         if (!co_is_optimizable_root(co, irn))
382                 return;
383
384         /* Init a new unit */
385         unit = xcalloc(1, sizeof(*unit));
386         unit->co = co;
387         unit->node_count = 1;
388         INIT_LIST_HEAD(&unit->queue);
389
390         /* Phi with some/all of its arguments */
391         if (is_Reg_Phi(irn)) {
392                 int i, arity;
393
394                 /* init */
395                 arity = get_irn_arity(irn);
396                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
397                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
398                 unit->nodes[0] = irn;
399
400                 /* fill */
401                 for (i=0; i<arity; ++i) {
402                         int o, arg_pos;
403                         ir_node *arg = get_irn_n(irn, i);
404
405                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
406                         if (arg == irn)
407                                 continue;
408                         if (nodes_interfere(co->cenv, irn, arg)) {
409                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
410                                 continue;
411                         }
412
413                         /* Else insert the argument of the phi to the members of this ou */
414                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
415
416                         /* Check if arg has occurred at a prior position in the arg/list */
417                         arg_pos = 0;
418                         for (o=0; o<unit->node_count; ++o)
419                                 if (unit->nodes[o] == arg) {
420                                         arg_pos = o;
421                                         break;
422                                 }
423
424                         if (!arg_pos) { /* a new argument */
425                                 /* insert node, set costs */
426                                 unit->nodes[unit->node_count] = arg;
427                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
428                                 unit->node_count++;
429                         } else { /* arg has occured before in same phi */
430                                 /* increase costs for existing arg */
431                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
432                         }
433                 }
434                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
435                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
436         } else
437
438         /* Proj of a perm with corresponding arg */
439         if (is_Perm_Proj(co->aenv, irn)) {
440                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
441                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
442                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
443                 unit->node_count = 2;
444                 unit->nodes[0] = irn;
445                 unit->nodes[1] = get_Perm_src(irn);
446                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
447         } else
448
449         /* Src == Tgt of a 2-addr-code instruction */
450         if (is_2addr_code(co->aenv, irn, &req)) {
451                 ir_node *other = req.other_same;
452                 if (!nodes_interfere(co->cenv, irn, other)) {
453                         unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
454                         unit->costs = xmalloc(2 * sizeof(*unit->costs));
455                         unit->node_count = 2;
456                         unit->nodes[0] = irn;
457                         unit->nodes[1] = other;
458                         unit->costs[1] = co->get_costs(co, irn, other, -1);
459                 }
460         } else
461                 assert(0 && "This is not an optimizable node!");
462
463         /* Insert the new unit at a position according to its costs */
464         if (unit->node_count > 1) {
465                 int i;
466                 struct list_head *tmp;
467
468                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
469                 for(i=1; i<unit->node_count; ++i) {
470                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
471                         unit->all_nodes_costs += unit->costs[i];
472                 }
473
474                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
475                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
476                 /* Insert the new ou according to its sort_key */
477                 tmp = &co->units;
478                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
479                         tmp = tmp->next;
480                 list_add(&unit->units, tmp);
481         } else {
482                 free(unit);
483         }
484 }
485
486 #ifdef QUICK_AND_DIRTY_HACK
487
488 static int compare_ous(const void *k1, const void *k2) {
489         const unit_t *u1 = *((const unit_t **) k1);
490         const unit_t *u2 = *((const unit_t **) k2);
491         int i, o, u1_has_constr, u2_has_constr;
492         arch_register_req_t req;
493         const arch_env_t *aenv = u1->co->aenv;
494
495         /* Units with constraints come first */
496         u1_has_constr = 0;
497         for (i=0; i<u1->node_count; ++i) {
498                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
499                 if (arch_register_req_is(&req, limited)) {
500                         u1_has_constr = 1;
501                         break;
502                 }
503         }
504
505         u2_has_constr = 0;
506         for (i=0; i<u2->node_count; ++i) {
507                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
508                 if (arch_register_req_is(&req, limited)) {
509                         u2_has_constr = 1;
510                         break;
511                 }
512         }
513
514         if (u1_has_constr != u2_has_constr)
515                 return u2_has_constr - u1_has_constr;
516
517         /* Now check, whether the two units are connected */
518 #if 0
519         for (i=0; i<u1->node_count; ++i)
520                 for (o=0; o<u2->node_count; ++o)
521                         if (u1->nodes[i] == u2->nodes[o])
522                                 return 0;
523 #endif
524
525         /* After all, the sort key decides. Greater keys come first. */
526         return u2->sort_key - u1->sort_key;
527
528 }
529
530 /**
531  * Sort the ou's according to constraints and their sort_key
532  */
533 static void co_sort_units(copy_opt_t *co) {
534         int i, count = 0, costs;
535         unit_t *ou, **ous;
536
537         /* get the number of ous, remove them form the list and fill the array */
538         list_for_each_entry(unit_t, ou, &co->units, units)
539                 count++;
540         ous = alloca(count * sizeof(*ous));
541
542         costs = co_get_max_copy_costs(co);
543
544         i = 0;
545         list_for_each_entry(unit_t, ou, &co->units, units)
546                 ous[i++] = ou;
547
548         INIT_LIST_HEAD(&co->units);
549
550         assert(count == i && list_empty(&co->units));
551
552         for (i=0; i<count; ++i)
553                 ir_printf("%+F\n", ous[i]->nodes[0]);
554
555         qsort(ous, count, sizeof(*ous), compare_ous);
556
557         ir_printf("\n\n");
558         for (i=0; i<count; ++i)
559                 ir_printf("%+F\n", ous[i]->nodes[0]);
560
561         /* reinsert into list in correct order */
562         for (i=0; i<count; ++i)
563                 list_add_tail(&ous[i]->units, &co->units);
564
565         assert(costs == co_get_max_copy_costs(co));
566 }
567 #endif
568
569 void co_build_ou_structure(copy_opt_t *co) {
570         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
571         INIT_LIST_HEAD(&co->units);
572         irg_walk_graph(co->irg, co_collect_units, NULL, co);
573 #ifdef QUICK_AND_DIRTY_HACK
574         co_sort_units(co);
575 #endif
576 }
577
578 void co_free_ou_structure(copy_opt_t *co) {
579         unit_t *curr, *tmp;
580         ASSERT_OU_AVAIL(co);
581         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
582                 xfree(curr->nodes);
583                 xfree(curr->costs);
584                 xfree(curr);
585         }
586         co->units.next = NULL;
587 }
588
589 /* co_solve_heuristic() is implemented in becopyheur.c */
590
591 int co_get_max_copy_costs(const copy_opt_t *co) {
592         int i, res = 0;
593         unit_t *curr;
594
595         ASSERT_OU_AVAIL(co);
596
597         list_for_each_entry(unit_t, curr, &co->units, units) {
598                 res += curr->inevitable_costs;
599                 for (i=1; i<curr->node_count; ++i)
600                         res += curr->costs[i];
601         }
602         return res;
603 }
604
605 int co_get_inevit_copy_costs(const copy_opt_t *co) {
606         int res = 0;
607         unit_t *curr;
608
609         ASSERT_OU_AVAIL(co);
610
611         list_for_each_entry(unit_t, curr, &co->units, units)
612                 res += curr->inevitable_costs;
613         return res;
614 }
615
616 int co_get_copy_costs(const copy_opt_t *co) {
617         int i, res = 0;
618         unit_t *curr;
619
620         ASSERT_OU_AVAIL(co);
621
622         list_for_each_entry(unit_t, curr, &co->units, units) {
623                 int root_col = get_irn_col(co, curr->nodes[0]);
624                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
625                 res += curr->inevitable_costs;
626                 for (i=1; i<curr->node_count; ++i) {
627                         int arg_col = get_irn_col(co, curr->nodes[i]);
628                         if (root_col != arg_col) {
629                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
630                                 res += curr->costs[i];
631                         }
632                 }
633         }
634         return res;
635 }
636
637 int co_get_lower_bound(const copy_opt_t *co) {
638         int res = 0;
639         unit_t *curr;
640
641         ASSERT_OU_AVAIL(co);
642
643         list_for_each_entry(unit_t, curr, &co->units, units)
644                 res += curr->inevitable_costs + curr->min_nodes_costs;
645         return res;
646 }
647
648 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
649 {
650         bitset_t *seen = bitset_irg_malloc(co->irg);
651         affinity_node_t *an;
652
653         memset(stat, 0, sizeof(stat[0]));
654
655         /* count affinity edges. */
656         co_gs_foreach_aff_node(co, an) {
657                 neighb_t *neigh;
658                 stat->aff_nodes += 1;
659                 bitset_add_irn(seen, an->irn);
660                 co_gs_foreach_neighb(an, neigh) {
661                         if(!bitset_contains_irn(seen, neigh->irn)) {
662                                 stat->aff_edges += 1;
663                                 stat->max_costs += neigh->costs;
664
665                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
666                                         stat->costs += neigh->costs;
667                                         stat->unsatisfied_edges += 1;
668                                 }
669
670                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
671                                         stat->aff_int += 1;
672                                         stat->inevit_costs += neigh->costs;
673                                 }
674
675                         }
676                 }
677         }
678
679         bitset_free(seen);
680 }
681
682 /******************************************************************************
683    _____                 _        _____ _
684   / ____|               | |      / ____| |
685  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
686  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
687  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
688   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
689                   | |                                       __/ |
690                   |_|                                      |___/
691  ******************************************************************************/
692
693 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
694         const affinity_node_t *n1 = k1;
695         const affinity_node_t *n2 = k2;
696
697         return (n1->irn != n2->irn);
698 }
699
700 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
701         affinity_node_t new_node, *node;
702         neighb_t new_nbr, *nbr;
703         int allocnew;
704
705         new_node.irn        = n1;
706         new_node.degree     = 0;
707         new_node.neighbours = NULL;
708         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
709
710         allocnew = 1;
711         for (nbr = node->neighbours; nbr; nbr = nbr->next)
712                 if (nbr->irn == n2) {
713                         allocnew = 0;
714                         break;
715                 }
716
717         /* if we did not find n2 in n1's neighbourhood insert it */
718         if (allocnew) {
719                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
720                 nbr = obstack_finish(&co->obst);
721                 nbr->irn   = n2;
722                 nbr->costs = 0;
723                 nbr->next  = node->neighbours;
724                 node->neighbours = nbr;
725                 node->degree++;
726         }
727
728         /* now nbr points to n1's neighbour-entry of n2 */
729         nbr->costs += costs;
730 }
731
732 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
733         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
734                 add_edge(co, n1, n2, costs);
735                 add_edge(co, n2, n1, costs);
736         }
737 }
738
739 static void build_graph_walker(ir_node *irn, void *env) {
740         copy_opt_t *co = env;
741         int pos, max;
742         arch_register_req_t req;
743         const arch_register_t *reg;
744
745         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
746                 return;
747
748         reg = arch_get_irn_register(co->aenv, irn);
749         if (arch_register_type_is(reg, ignore))
750                 return;
751
752         /* Phis */
753         if (is_Reg_Phi(irn))
754                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
755                         ir_node *arg = get_irn_n(irn, pos);
756                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
757                 }
758
759         /* Perms */
760         else if (is_Perm_Proj(co->aenv, irn)) {
761                 ir_node *arg = get_Perm_src(irn);
762                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
763         }
764
765         /* 2-address code */
766         else if (is_2addr_code(co->aenv, irn, &req))
767                 add_edges(co, irn, req.other_same, co->get_costs(co, irn, req.other_same, 0));
768 }
769
770 void co_build_graph_structure(copy_opt_t *co) {
771         obstack_init(&co->obst);
772         co->nodes = new_set(compare_affinity_node_t, 32);
773
774         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
775 }
776
777 void co_free_graph_structure(copy_opt_t *co) {
778         ASSERT_GS_AVAIL(co);
779
780         del_set(co->nodes);
781         obstack_free(&co->obst, NULL);
782         co->nodes = NULL;
783 }
784
785 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
786
787 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
788         affinity_node_t new_node, *n;
789
790         ASSERT_GS_AVAIL(co);
791
792         new_node.irn = irn;
793         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
794         if (n) {
795                 return (n->degree > 0);
796         } else
797                 return 0;
798 }
799
800 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
801 {
802         be_ifg_t *ifg  = co->cenv->ifg;
803         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
804         bitset_t *adm  = bitset_alloca(co->cls->n_regs);
805
806         ir_node *irn;
807         void *it, *nit;
808         int i, n, n_regs;
809
810         n_regs = 0;
811         for(i = 0; i < co->cls->n_regs; ++i) {
812                 const arch_register_t *reg = &co->cls->regs[i];
813                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
814         }
815
816         /*
817          * n contains the first node number.
818          * the values below n are the pre-colored register nodes
819          */
820
821         it  = be_ifg_nodes_iter_alloca(ifg);
822         nit = be_ifg_neighbours_iter_alloca(ifg);
823
824         n = n_regs;
825         be_ifg_foreach_node(ifg, it, irn) {
826                 if(!arch_irn_is(co->aenv, irn, ignore))
827                         set_irn_link(irn, INT_TO_PTR(n++));
828         }
829
830         fprintf(f, "%d %d\n", n, n_regs);
831
832         be_ifg_foreach_node(ifg, it, irn) {
833                 if(!arch_irn_is(co->aenv, irn, ignore)) {
834                         int idx            = PTR_TO_INT(get_irn_link(irn));
835                         affinity_node_t *a = get_affinity_info(co, irn);
836
837                         arch_register_req_t req;
838                         ir_node *adj;
839
840                         arch_get_register_req(co->aenv, &req, irn, BE_OUT_POS(0));
841                         if(arch_register_req_is(&req, limited)) {
842                                 bitset_clear_all(adm);
843                                 req.limited(req.limited_env, adm);
844                                 for(i = 0; i < co->cls->n_regs; ++i)
845                                         if(!bitset_is_set(adm, i) && color_map[i] >= 0)
846                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
847
848                         }
849
850
851                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
852                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
853                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
854                                         if(idx < adj_idx)
855                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
856                                 }
857                         }
858
859                         if(a) {
860                                 neighb_t *n;
861
862                                 co_gs_foreach_neighb(a, n) {
863                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
864                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
865                                                 if(idx < n_idx)
866                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
867                                         }
868                                 }
869                         }
870                 }
871         }
872 }
873
874 typedef struct _appel_clique_walker_t {
875         phase_t ph;
876         const copy_opt_t *co;
877         int curr_nr;
878         int node_count;
879         FILE *f;
880         int dumb;
881         int *color_map;
882         struct obstack obst;
883 } appel_clique_walker_t;
884
885 typedef struct _appel_block_info_t {
886         int *live_end_nr;
887         int *live_in_nr;
888         int *phi_nr;
889         ir_node **live_end;
890         ir_node **live_in;
891         ir_node **phi;
892         int n_live_end;
893         int n_live_in;
894         int n_phi;
895 } appel_block_info_t;
896
897 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
898 {
899 #if 0
900         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
901         int res = (int) freq;
902         return res == 0 ? 1 : res;
903 #else
904         ir_loop *loop = get_irn_loop(bl);
905         if(loop) {
906                 int d = get_loop_depth(loop);
907                 return 1 + d * d;
908         }
909         return 1;
910 #endif
911 }
912
913 static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old)
914 {
915         appel_block_info_t *res = NULL;
916
917         if(is_Block(irn)) {
918                 appel_clique_walker_t *d = (void *) phase;
919                 res = phase_alloc(phase, sizeof(res[0]));
920                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
921                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
922                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
923                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
924                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
925                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
926         }
927
928         return res;
929 }
930
931 typedef struct _insn_list_t {
932         be_insn_t *insn;
933         struct list_head list;
934 } insn_list_t;
935
936 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
937 {
938         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
939         int i;
940
941         for(i = 0; i < bli->n_live_end; ++i)
942                 if(bli->live_end[i] == irn)
943                         return bli->live_end_nr[i];
944
945         return -1;
946 }
947
948 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
949 {
950         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
951         ir_node *irn;
952         int n_live;
953         int j;
954
955         n_live = 0;
956         foreach_pset(live, irn)
957                 live_arr[n_live++] = irn;
958
959         /* dump the live after clique */
960         if(!env->dumb) {
961                 for(j = 0; j < n_live; ++j) {
962                         int k;
963
964                         for(k = j + 1; k < n_live; ++k) {
965                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
966                         }
967                         fprintf(env->f, "\n");
968                 }
969         }
970
971         /* dump the affinities */
972         for(j = 0; !env->dumb && j < n_live; ++j) {
973                 ir_node *irn = live_arr[j];
974                 int old_nr = PTR_TO_INT(get_irn_link(irn));
975
976                 /* if the node was already live in the last insn dump the affinity */
977                 if(old_nr > start_nr) {
978                         int weight = appel_aff_weight(env, bl);
979                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
980                 }
981         }
982
983         /* set the current numbers into the link field. */
984         for(j = 0; j < n_live; ++j) {
985                 ir_node *irn = live_arr[j];
986                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
987         }
988
989         return curr_nr + n_live;
990 }
991
992 static void appel_walker(ir_node *bl, void *data)
993 {
994         appel_clique_walker_t *env = data;
995         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
996         struct obstack *obst       = &env->obst;
997         void *base                 = obstack_base(obst);
998         pset *live                 = pset_new_ptr_default();
999
1000         int n_insns  = 0;
1001         int n_nodes  = 0;
1002         int start_nr = env->curr_nr;
1003         int curr_nr  = start_nr;
1004
1005         be_insn_env_t insn_env;
1006         int i, j;
1007         ir_node *irn;
1008         be_insn_t **insns;
1009
1010         insn_env.aenv = env->co->aenv;
1011         insn_env.cls  = env->co->cls;
1012         insn_env.obst = obst;
1013         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1014
1015         /* Guess how many insns will be in this block. */
1016         sched_foreach(bl, irn)
1017                 n_nodes++;
1018
1019         bli->n_phi = 0;
1020         insns = malloc(n_nodes * sizeof(insns[0]));
1021
1022         /* Put all insns in an array. */
1023         irn = sched_first(bl);
1024         while(!sched_is_end(irn)) {
1025                 be_insn_t *insn;
1026                 insn = be_scan_insn(&insn_env, irn);
1027                 insns[n_insns++] = insn;
1028                 irn = insn->next_insn;
1029         }
1030
1031         DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl));
1032         be_liveness_end_of_block(env->co->cenv->lv, env->co->aenv, env->co->cls, bl, live);
1033
1034         /* Generate the bad and ugly. */
1035         for(i = n_insns - 1; i >= 0; --i) {
1036                 be_insn_t *insn = insns[i];
1037
1038                 /* The first live set has to be saved in the block border set. */
1039                 if(i == n_insns - 1) {
1040                         j = 0;
1041                         foreach_pset(live, irn) {
1042                                 bli->live_end[j]    = irn;
1043                                 bli->live_end_nr[j] = curr_nr + j;
1044                                 ++j;
1045                         }
1046                         bli->n_live_end = j;
1047                 }
1048
1049                 if(!env->dumb) {
1050                         for(j = 0; j < insn->use_start; ++j) {
1051                                 ir_node *op   = insn->ops[j].carrier;
1052                                 bitset_t *adm = insn->ops[j].regs;
1053                                 int k;
1054                                 int nr;
1055
1056                                 if(!insn->ops[j].has_constraints)
1057                                         continue;
1058
1059                                 nr = 0;
1060                                 foreach_pset(live, irn) {
1061                                         if(irn == op) {
1062                                                 pset_break(live);
1063                                                 break;
1064                                         }
1065                                         ++nr;
1066                                 }
1067
1068                                 assert(nr < pset_count(live));
1069
1070                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1071                                         int mapped_col = env->color_map[k];
1072                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1073                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1074                                 }
1075                         }
1076                 }
1077
1078                 /* dump the clique and update the stuff. */
1079                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1080
1081                 /* remove all defs. */
1082                 for(j = 0; j < insn->use_start; ++j)
1083                         pset_remove_ptr(live, insn->ops[j].carrier);
1084
1085                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1086                         bli->phi[bli->n_phi]    = insn->irn;
1087                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1088                         bli->n_phi++;
1089                 }
1090
1091                 /* add all uses */
1092                 else
1093                         for(j = insn->use_start; j < insn->n_ops; ++j)
1094                                 pset_insert_ptr(live, insn->ops[j].carrier);
1095         }
1096
1097         /* print the start clique. */
1098         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1099
1100         i = 0;
1101         foreach_pset(live, irn) {
1102                 bli->live_in[i]    = irn;
1103                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1104                 ++i;
1105         }
1106         bli->n_live_in = i;
1107
1108         del_pset(live);
1109         free(insns);
1110         obstack_free(obst, base);
1111         env->curr_nr = curr_nr;
1112 }
1113
1114 static void appel_inter_block_aff(ir_node *bl, void *data)
1115 {
1116         appel_clique_walker_t *env = data;
1117         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1118
1119         int i, j, n;
1120
1121         for(i = 0; i < bli->n_live_in; ++i) {
1122                 ir_node *irn = bli->live_in[i];
1123
1124                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1125                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1126
1127                         int nr = appel_get_live_end_nr(env, pred, irn);
1128                         assert(nr >= 0);
1129                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1130                 }
1131         }
1132
1133         for(i = 0; i < bli->n_phi; ++i) {
1134                 ir_node *irn = bli->phi[i];
1135
1136                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1137                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1138                         ir_node *op = get_irn_n(irn, j);
1139
1140                         int nr = appel_get_live_end_nr(env, pred, op);
1141                         assert(nr >= 0);
1142                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1143                 }
1144         }
1145
1146 }
1147
1148 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1149 {
1150         int i;
1151         int n_colors;
1152         appel_clique_walker_t env;
1153         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1154
1155         be_liveness_recompute(co->cenv->lv);
1156         obstack_init(&env.obst);
1157         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init);
1158         env.curr_nr = co->cls->n_regs;
1159         env.co = co;
1160         env.f = f;
1161
1162         bitset_copy(adm, co->cenv->ignore_colors);
1163         bitset_flip_all(adm);
1164
1165         /* Make color map. */
1166         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1167         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1168                 const arch_register_t *reg = &co->cls->regs[i];
1169                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1170         }
1171
1172         env.dumb = 1;
1173         env.curr_nr = n_colors;
1174         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1175         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1176
1177         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1178
1179         /* make the first k nodes interfere */
1180         for(i = 0; i < n_colors; ++i) {
1181                 int j;
1182                 for(j = i + 1; j < n_colors; ++j)
1183                         fprintf(f, "%d %d -1 ", i, j);
1184                 fprintf(f, "\n");
1185         }
1186
1187         env.dumb = 0;
1188         env.curr_nr = n_colors;
1189         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1190         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1191         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1192         obstack_free(&env.obst, NULL);
1193 }
1194
1195 /*
1196          ___ _____ ____   ____   ___ _____   ____                        _
1197         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1198          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1199          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1200         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1201                                                                   |_|            |___/
1202 */
1203
1204 static const char *get_dot_color_name(int col)
1205 {
1206         static const char *names[] = {
1207                 "blue",
1208                 "red",
1209                 "green",
1210                 "yellow",
1211                 "cyan",
1212                 "magenta",
1213                 "orange",
1214                 "chocolate",
1215                 "beige",
1216                 "navy",
1217                 "darkgreen",
1218                 "darkred",
1219                 "lightPink",
1220                 "chartreuse",
1221                 "lightskyblue",
1222                 "linen",
1223                 "pink",
1224                 "lightslateblue",
1225                 "mintcream",
1226                 "red",
1227                 "darkolivegreen",
1228                 "mediumblue",
1229                 "mistyrose",
1230                 "salmon",
1231                 "darkseagreen",
1232                 "mediumslateblue"
1233                 "moccasin",
1234                 "tomato",
1235                 "forestgreen",
1236                 "darkturquoise",
1237                 "palevioletred"
1238         };
1239
1240         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1241 }
1242
1243 typedef struct _co_ifg_dump_t {
1244         const copy_opt_t *co;
1245         unsigned flags;
1246 } co_ifg_dump_t;
1247
1248 static void ifg_dump_graph_attr(FILE *f, void *self)
1249 {
1250         fprintf(f, "overlap=scale");
1251 }
1252
1253 static int ifg_is_dump_node(void *self, ir_node *irn)
1254 {
1255         co_ifg_dump_t *cod = self;
1256         return !arch_irn_is(cod->co->aenv, irn, ignore);
1257 }
1258
1259 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1260 {
1261         co_ifg_dump_t *env         = self;
1262         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1263         arch_register_req_t req;
1264         int limited;
1265
1266         arch_get_register_req(env->co->aenv, &req, irn, BE_OUT_POS(0));
1267         limited = arch_register_req_is(&req, limited);
1268
1269         if(env->flags & CO_IFG_DUMP_LABELS) {
1270                 ir_fprintf(f, "label=\"%+F", irn);
1271
1272                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1273                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1274                         req.limited(req.limited_env, bs);
1275                         ir_fprintf(f, "\\n%B", bs);
1276                 }
1277                 ir_fprintf(f, "\" ");
1278         }
1279
1280         else
1281                 fprintf(f, "label=\"\" shape=point " );
1282
1283         if(env->flags & CO_IFG_DUMP_SHAPE)
1284                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1285
1286         if(env->flags & CO_IFG_DUMP_COLORS)
1287                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1288 }
1289
1290 static void ifg_dump_at_end(FILE *file, void *self)
1291 {
1292         co_ifg_dump_t *env = self;
1293         affinity_node_t *a;
1294
1295         co_gs_foreach_aff_node(env->co, a) {
1296                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1297                 unsigned aidx = get_irn_idx(a->irn);
1298                 neighb_t *n;
1299
1300                 co_gs_foreach_neighb(a, n) {
1301                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1302                         unsigned nidx = get_irn_idx(n->irn);
1303
1304                         if(aidx < nidx) {
1305                                 const char *color = nr == ar ? "blue" : "red";
1306                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1307                                 if(env->flags & CO_IFG_DUMP_LABELS)
1308                                         fprintf(file, "label=\"%d\" ", n->costs);
1309                                 if(env->flags & CO_IFG_DUMP_COLORS)
1310                                         fprintf(file, "color=%s ", color);
1311                                 else
1312                                         fprintf(file, "style=dotted");
1313                                 fprintf(file, "];\n");
1314                         }
1315                 }
1316         }
1317 }
1318
1319
1320 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1321         ifg_is_dump_node,
1322         ifg_dump_graph_attr,
1323         ifg_dump_node_attr,
1324         NULL,
1325         NULL,
1326         ifg_dump_at_end
1327 };
1328
1329
1330
1331 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1332 {
1333         co_ifg_dump_t cod;
1334
1335         cod.co    = co;
1336         cod.flags = flags;
1337         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1338 }
1339
1340
1341 void co_solve_park_moon(copy_opt_t *opt)
1342 {
1343
1344 }
1345
1346 static int void_algo(copy_opt_t *co)
1347 {
1348         return 0;
1349 }
1350
1351 /*
1352                 _    _                  _ _   _
1353            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1354           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1355          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1356         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1357                            |___/
1358 */
1359
1360 typedef struct {
1361         co_algo_t *algo;
1362         const char *name;
1363         int can_improve_existing;
1364 } co_algo_info_t;
1365
1366 static co_algo_info_t algos[] = {
1367         { void_algo,                "none",  0 },
1368         { co_solve_heuristic,       "heur1", 0 },
1369         { co_solve_heuristic_new,   "heur2", 0 },
1370         { co_solve_heuristic_java,  "heur3", 0 },
1371 #ifdef WITH_ILP
1372         { co_solve_ilp2,            "ilp",   1 },
1373 #endif
1374         { NULL,                     "",      0 }
1375 };
1376
1377 /*
1378     __  __       _         ____       _
1379    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1380    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1381    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1382    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1383
1384 */
1385
1386 void co_driver(be_chordal_env_t *cenv)
1387 {
1388 #ifdef WITH_LIBCORE
1389         lc_timer_t *timer = lc_timer_register("firm.be.copyopt", "runtime");
1390 #endif
1391         co_complete_stats_t before, after;
1392         copy_opt_t *co;
1393         co_algo_t  *algo_func;
1394         int was_optimal = 0;
1395
1396         if(algo < 0 || algo >= CO_ALGO_LAST)
1397                 return;
1398
1399         co = new_copy_opt(cenv, cost_func);
1400         co_build_ou_structure(co);
1401         co_build_graph_structure(co);
1402
1403         co_complete_stats(co, &before);
1404
1405         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1406         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1407         be_stat_ev_ull("co_max_costs",    before.max_costs);
1408         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1409         be_stat_ev_ull("co_aff_int",      before.aff_int);
1410
1411         be_stat_ev_ull("co_init_costs",   before.costs);
1412         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1413
1414         /* Dump the interference graph in Appel's format. */
1415         if(dump_flags & DUMP_APPEL) {
1416                 FILE *f = be_chordal_open(cenv, "", ".apl");
1417                 co_dump_appel_graph(co, f);
1418                 fclose(f);
1419         }
1420
1421         if(dump_flags & DUMP_BEFORE) {
1422                 FILE *f = be_chordal_open(cenv, "", "-before.dot");
1423                 co_dump_ifg_dot(co, f, style_flags);
1424                 fclose(f);
1425         }
1426
1427         /* if the algo can improve results, provide an initial solution with heur3 */
1428         if(improve && algos[algo].can_improve_existing) {
1429                 co_complete_stats_t stats;
1430
1431                 /* produce a heuristical solution */
1432                 co_solve_heuristic_java(co);
1433
1434                 /* do the stats and provide the current costs */
1435                 co_complete_stats(co, &stats);
1436                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1437         }
1438
1439         algo_func = algos[algo].algo;
1440
1441 #ifdef WITH_LIBCORE
1442         lc_timer_reset_and_start(timer);
1443 #endif
1444
1445         was_optimal = algo_func(co);
1446
1447 #ifdef WITH_LIBCORE
1448         lc_timer_stop(timer);
1449         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1450 #endif
1451
1452         be_stat_ev_ull("co_optimal", was_optimal);
1453
1454         if(dump_flags & DUMP_AFTER) {
1455                 FILE *f = be_chordal_open(cenv, "", "-after.dot");
1456                 co_dump_ifg_dot(co, f, style_flags);
1457                 fclose(f);
1458         }
1459
1460         co_complete_stats(co, &after);
1461
1462         if(do_stats) {
1463                 int optimizable_costs = after.max_costs - after.inevit_costs;
1464                 int evitable          = after.costs     - after.inevit_costs;
1465
1466                 ir_printf("%30F ", cenv->irg);
1467                 printf("%10s %10llu%10llu%10llu", cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1468
1469                 if(optimizable_costs > 0)
1470                         printf("%10llu %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1471                 else
1472                         printf("%10llu %5s\n", after.costs, "-");
1473         }
1474
1475         be_stat_ev_ull("co_after_costs", after.costs);
1476         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1477
1478         co_free_graph_structure(co);
1479         co_free_ou_structure(co);
1480         free_copy_opt(co);
1481 }