4760342289ba300369ce2f4edb5777f118d12ea2
[libfirm] / ir / be / becopyopt.c
1 /**
2  * Author:      Daniel Grund
3  * Date:                12.04.2005
4  * Copyright:   (c) Universitaet Karlsruhe
5  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  */
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10
11 #ifdef HAVE_ALLOCA_H
12 #include <alloca.h>
13 #endif
14 #ifdef HAVE_MALLOC_H
15 #include <malloc.h>
16 #endif
17
18 #include "execfreq.h"
19 #include "xmalloc.h"
20 #include "debug.h"
21 #include "pmap.h"
22 #include "irgraph.h"
23 #include "irgwalk.h"
24 #include "irprog.h"
25 #include "irloop_t.h"
26 #include "iredges_t.h"
27 #include "phiclass.h"
28 #include "irbitset.h"
29 #include "irphase_t.h"
30 #include "irprintf_t.h"
31
32
33 #include "bearch.h"
34 #include "benode_t.h"
35 #include "beutil.h"
36 #include "beifg_t.h"
37 #include "becopyopt_t.h"
38 #include "becopystat.h"
39 #include "belive_t.h"
40 #include "beinsn_t.h"
41 #include "besched_t.h"
42 #include "benodesets.h"
43 #include "bejavacoal.h"
44 #include "bestatevent.h"
45 #include "beirg_t.h"
46
47 #ifdef WITH_LIBCORE
48 #include <libcore/lc_timing.h>
49 #include <libcore/lc_opts.h>
50 #endif /* WITH_LIBCORE */
51
52 #define DUMP_BEFORE 1
53 #define DUMP_AFTER  2
54 #define DUMP_APPEL  4
55 #define DUMP_ALL    2 * DUMP_APPEL - 1
56
57 #define COST_FUNC_FREQ     1
58 #define COST_FUNC_LOOP     2
59 #define COST_FUNC_ALL_ONE  3
60
61 static unsigned dump_flags    = 0;
62 static unsigned style_flags   = 0;
63 static unsigned  do_stats     = 0;
64 static cost_fct_t cost_func   = co_get_costs_exec_freq;
65 static unsigned algo          = CO_ALGO_HEUR2;
66 static int improve            = 1;
67
68 #ifdef WITH_LIBCORE
69 static const lc_opt_enum_mask_items_t dump_items[] = {
70         { "before",  DUMP_BEFORE },
71         { "after",   DUMP_AFTER  },
72         { "appel",   DUMP_APPEL  },
73         { "all",     DUMP_ALL    },
74         { NULL,      0 }
75 };
76
77 static const lc_opt_enum_mask_items_t style_items[] = {
78         { "color",   CO_IFG_DUMP_COLORS },
79         { "labels",  CO_IFG_DUMP_LABELS },
80         { "constr",  CO_IFG_DUMP_CONSTR },
81         { "shape",   CO_IFG_DUMP_SHAPE  },
82         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
83         { NULL,      0 }
84 };
85
86 static const lc_opt_enum_mask_items_t algo_items[] = {
87         { "none",   CO_ALGO_NONE  },
88         { "heur",   CO_ALGO_HEUR  },
89         { "heur2",  CO_ALGO_HEUR2 },
90 #ifdef WITH_JVM
91         { "heur3",  CO_ALGO_HEUR3 },
92 #endif /* WITH_JVM */
93 #ifdef WITH_ILP
94         { "ilp",    CO_ALGO_ILP   },
95 #endif /* WITH_ILP */
96         { NULL,     0 }
97 };
98
99 typedef int (*opt_funcptr)(void);
100
101 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
102         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
103         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
104         { "one",    (opt_funcptr) co_get_costs_all_one },
105         { NULL,     NULL }
106 };
107
108 static lc_opt_enum_mask_var_t dump_var = {
109         &dump_flags, dump_items
110 };
111
112 static lc_opt_enum_mask_var_t style_var = {
113         &style_flags, style_items
114 };
115
116 static lc_opt_enum_mask_var_t algo_var = {
117         &algo, algo_items
118 };
119
120 static lc_opt_enum_func_ptr_var_t cost_func_var = {
121         (opt_funcptr*) &cost_func, cost_func_items
122 };
123
124 static const lc_opt_table_entry_t options[] = {
125         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
126         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
127         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
128         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
129         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
130         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
131         { NULL }
132 };
133
134 /* Insert additional options registration functions here. */
135 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
136 extern void be_co2_register_options(lc_opt_entry_t *grp);
137 extern void be_co3_register_options(lc_opt_entry_t *grp);
138
139 void co_register_options(lc_opt_entry_t *grp)
140 {
141         lc_opt_entry_t *co_grp = lc_opt_get_grp(grp, "co");
142         lc_opt_add_table(co_grp, options);
143
144         be_co2_register_options(co_grp);
145         be_co3_register_options(co_grp);
146 #ifdef WITH_ILP
147         be_co_ilp_register_options(co_grp);
148 #endif
149 }
150 #endif
151
152
153 #undef QUICK_AND_DIRTY_HACK
154
155 /******************************************************************************
156     _____                           _
157    / ____|                         | |
158   | |  __  ___ _ __   ___ _ __ __ _| |
159   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
160   | |__| |  __/ | | |  __/ | | (_| | |
161    \_____|\___|_| |_|\___|_|  \__,_|_|
162
163  ******************************************************************************/
164
165 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
166
167
168 void be_copy_opt_init(void) {
169 }
170
171 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
172 {
173         const char *s1, *s2, *s3;
174         int len;
175         copy_opt_t *co;
176
177         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
178
179         co = xcalloc(1, sizeof(*co));
180         co->cenv      = chordal_env;
181         co->aenv      = chordal_env->birg->main_env->arch_env;
182         co->irg       = chordal_env->irg;
183         co->cls       = chordal_env->cls;
184         co->get_costs = get_costs;
185
186         s1 = get_irp_prog_name();
187         s2 = get_entity_name(get_irg_entity(co->irg));
188         s3 = chordal_env->cls->name;
189         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
190         co->name = xmalloc(len);
191         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
192
193         return co;
194 }
195
196 void free_copy_opt(copy_opt_t *co) {
197         xfree(co->name);
198         free(co);
199 }
200
201 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
202         arch_register_req_t req;
203         const arch_register_t *reg;
204
205         if (arch_irn_is(co->aenv, irn, ignore))
206                 return 0;
207
208         reg = arch_get_irn_register(co->aenv, irn);
209         if (arch_register_type_is(reg, ignore))
210                 return 0;
211
212         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(co->aenv, irn, &req))
213                 return 1;
214
215         return 0;
216 }
217
218 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
219         const ir_edge_t *edge;
220         const arch_register_t *reg;
221
222         assert(0 && "Is buggy and obsolete. Do not use");
223
224         if (arch_irn_is(co->aenv, irn, ignore))
225                 return 0;
226
227         reg = arch_get_irn_register(co->aenv, irn);
228         if (arch_register_type_is(reg, ignore))
229                 return 0;
230
231         foreach_out_edge(irn, edge) {
232                 ir_node *n = edge->src;
233
234                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
235                         arch_register_req_t req;
236                         arch_get_register_req(co->aenv, &req, n, -1);
237
238                         if(is_Reg_Phi(n) ||
239                            is_Perm(co->aenv, n) ||
240                            (arch_register_req_is(&req, should_be_same) && req.other_same == irn)
241                           )
242                                 return 1;
243                 }
244         }
245
246         return 0;
247 }
248
249 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
250         int cost = 0;
251         ir_loop *loop;
252         ir_node *root_block = get_nodes_block(root);
253
254         if (is_Phi(root)) {
255                 /* for phis the copies are placed in the corresponding pred-block */
256                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
257         } else {
258                 /* a perm places the copy in the same block as it resides */
259                 loop = get_irn_loop(root_block);
260         }
261         if (loop) {
262                 int d = get_loop_depth(loop);
263                 cost = d*d;
264         }
265         return 1+cost;
266 }
267
268 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
269         int res;
270         ir_node *root_bl = get_nodes_block(root);
271         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
272         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
273
274         /* don't allow values smaller than one. */
275         return res < 1 ? 1 : res;
276 }
277
278
279 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
280         return 1;
281 }
282
283 /******************************************************************************
284    ____        _   _    _       _ _          _____ _
285   / __ \      | | | |  | |     (_) |        / ____| |
286  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
287  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
288  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
289   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
290         | |                                                            __/ |
291         |_|                                                           |___/
292  ******************************************************************************/
293
294 /**
295  * Determines a maximum weighted independent set with respect to
296  * the interference and conflict edges of all nodes in a qnode.
297  */
298 static int ou_max_ind_set_costs(unit_t *ou) {
299         be_chordal_env_t *chordal_env = ou->co->cenv;
300         ir_node **safe, **unsafe;
301         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
302         bitset_t *curr;
303         int max, pos, curr_weight, best_weight = 0;
304
305         /* assign the nodes into two groups.
306          * safe: node has no interference, hence it is in every max stable set.
307          * unsafe: node has an interference
308          */
309         safe = alloca((ou->node_count-1) * sizeof(*safe));
310         safe_costs = 0;
311         safe_count = 0;
312         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
313         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
314         unsafe_count = 0;
315         for(i=1; i<ou->node_count; ++i) {
316                 int is_safe = 1;
317                 for(o=1; o<ou->node_count; ++o) {
318                         if (i==o)
319                                 continue;
320                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
321                                 unsafe_costs[unsafe_count] = ou->costs[i];
322                                 unsafe[unsafe_count] = ou->nodes[i];
323                                 ++unsafe_count;
324                                 is_safe = 0;
325                                 break;
326                         }
327                 }
328                 if (is_safe) {
329                         safe_costs += ou->costs[i];
330                         safe[safe_count++] = ou->nodes[i];
331                 }
332         }
333
334
335         /* now compute the best set out of the unsafe nodes*/
336         if (unsafe_count > MIS_HEUR_TRIGGER) {
337                 bitset_t *best = bitset_alloca(unsafe_count);
338                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
339                 for (i=0; i<unsafe_count; ++i) {
340                         bitset_set(best, i);
341                         /* check if it is a stable set */
342                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
343                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
344                                         bitset_clear(best, i); /* clear the bit and try next one */
345                                         break;
346                                 }
347                 }
348                 /* compute the weight */
349                 bitset_foreach(best, pos)
350                         best_weight += unsafe_costs[pos];
351         } else {
352                 /* Exact Algorithm: Brute force */
353                 curr = bitset_alloca(unsafe_count);
354                 bitset_set_all(curr);
355                 while ((max = bitset_popcnt(curr)) != 0) {
356                         /* check if curr is a stable set */
357                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
358                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
359                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
360                                                         goto no_stable_set;
361
362                         /* if we arrive here, we have a stable set */
363                         /* compute the weigth of the stable set*/
364                         curr_weight = 0;
365                         bitset_foreach(curr, pos)
366                                 curr_weight += unsafe_costs[pos];
367
368                         /* any better ? */
369                         if (curr_weight > best_weight) {
370                                 best_weight = curr_weight;
371                         }
372
373         no_stable_set:
374                         bitset_minus1(curr);
375                 }
376         }
377
378         return safe_costs+best_weight;
379 }
380
381 static void co_collect_units(ir_node *irn, void *env) {
382         copy_opt_t *co = env;
383         unit_t *unit;
384         arch_register_req_t req;
385
386         if (!is_curr_reg_class(co, irn))
387                 return;
388         if (!co_is_optimizable_root(co, irn))
389                 return;
390
391         /* Init a new unit */
392         unit = xcalloc(1, sizeof(*unit));
393         unit->co = co;
394         unit->node_count = 1;
395         INIT_LIST_HEAD(&unit->queue);
396
397         /* Phi with some/all of its arguments */
398         if (is_Reg_Phi(irn)) {
399                 int i, arity;
400
401                 /* init */
402                 arity = get_irn_arity(irn);
403                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
404                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
405                 unit->nodes[0] = irn;
406
407                 /* fill */
408                 for (i=0; i<arity; ++i) {
409                         int o, arg_pos;
410                         ir_node *arg = get_irn_n(irn, i);
411
412                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
413                         if (arg == irn)
414                                 continue;
415                         if (nodes_interfere(co->cenv, irn, arg)) {
416                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
417                                 continue;
418                         }
419
420                         /* Else insert the argument of the phi to the members of this ou */
421                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
422
423                         /* Check if arg has occurred at a prior position in the arg/list */
424                         arg_pos = 0;
425                         for (o=0; o<unit->node_count; ++o)
426                                 if (unit->nodes[o] == arg) {
427                                         arg_pos = o;
428                                         break;
429                                 }
430
431                         if (!arg_pos) { /* a new argument */
432                                 /* insert node, set costs */
433                                 unit->nodes[unit->node_count] = arg;
434                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
435                                 unit->node_count++;
436                         } else { /* arg has occured before in same phi */
437                                 /* increase costs for existing arg */
438                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
439                         }
440                 }
441                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
442                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
443         } else
444
445         /* Proj of a perm with corresponding arg */
446         if (is_Perm_Proj(co->aenv, irn)) {
447                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
448                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
449                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
450                 unit->node_count = 2;
451                 unit->nodes[0] = irn;
452                 unit->nodes[1] = get_Perm_src(irn);
453                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
454         } else
455
456         /* Src == Tgt of a 2-addr-code instruction */
457         if (is_2addr_code(co->aenv, irn, &req)) {
458                 ir_node *other = req.other_same;
459                 if (!nodes_interfere(co->cenv, irn, other)) {
460                         unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
461                         unit->costs = xmalloc(2 * sizeof(*unit->costs));
462                         unit->node_count = 2;
463                         unit->nodes[0] = irn;
464                         unit->nodes[1] = other;
465                         unit->costs[1] = co->get_costs(co, irn, other, -1);
466                 }
467         } else
468                 assert(0 && "This is not an optimizable node!");
469
470         /* Insert the new unit at a position according to its costs */
471         if (unit->node_count > 1) {
472                 int i;
473                 struct list_head *tmp;
474
475                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
476                 for(i=1; i<unit->node_count; ++i) {
477                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
478                         unit->all_nodes_costs += unit->costs[i];
479                 }
480
481                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
482                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
483                 /* Insert the new ou according to its sort_key */
484                 tmp = &co->units;
485                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
486                         tmp = tmp->next;
487                 list_add(&unit->units, tmp);
488         } else {
489                 free(unit);
490         }
491 }
492
493 #ifdef QUICK_AND_DIRTY_HACK
494
495 static int compare_ous(const void *k1, const void *k2) {
496         const unit_t *u1 = *((const unit_t **) k1);
497         const unit_t *u2 = *((const unit_t **) k2);
498         int i, o, u1_has_constr, u2_has_constr;
499         arch_register_req_t req;
500         const arch_env_t *aenv = u1->co->aenv;
501
502         /* Units with constraints come first */
503         u1_has_constr = 0;
504         for (i=0; i<u1->node_count; ++i) {
505                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
506                 if (arch_register_req_is(&req, limited)) {
507                         u1_has_constr = 1;
508                         break;
509                 }
510         }
511
512         u2_has_constr = 0;
513         for (i=0; i<u2->node_count; ++i) {
514                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
515                 if (arch_register_req_is(&req, limited)) {
516                         u2_has_constr = 1;
517                         break;
518                 }
519         }
520
521         if (u1_has_constr != u2_has_constr)
522                 return u2_has_constr - u1_has_constr;
523
524         /* Now check, whether the two units are connected */
525 #if 0
526         for (i=0; i<u1->node_count; ++i)
527                 for (o=0; o<u2->node_count; ++o)
528                         if (u1->nodes[i] == u2->nodes[o])
529                                 return 0;
530 #endif
531
532         /* After all, the sort key decides. Greater keys come first. */
533         return u2->sort_key - u1->sort_key;
534
535 }
536
537 /**
538  * Sort the ou's according to constraints and their sort_key
539  */
540 static void co_sort_units(copy_opt_t *co) {
541         int i, count = 0, costs;
542         unit_t *ou, **ous;
543
544         /* get the number of ous, remove them form the list and fill the array */
545         list_for_each_entry(unit_t, ou, &co->units, units)
546                 count++;
547         ous = alloca(count * sizeof(*ous));
548
549         costs = co_get_max_copy_costs(co);
550
551         i = 0;
552         list_for_each_entry(unit_t, ou, &co->units, units)
553                 ous[i++] = ou;
554
555         INIT_LIST_HEAD(&co->units);
556
557         assert(count == i && list_empty(&co->units));
558
559         for (i=0; i<count; ++i)
560                 ir_printf("%+F\n", ous[i]->nodes[0]);
561
562         qsort(ous, count, sizeof(*ous), compare_ous);
563
564         ir_printf("\n\n");
565         for (i=0; i<count; ++i)
566                 ir_printf("%+F\n", ous[i]->nodes[0]);
567
568         /* reinsert into list in correct order */
569         for (i=0; i<count; ++i)
570                 list_add_tail(&ous[i]->units, &co->units);
571
572         assert(costs == co_get_max_copy_costs(co));
573 }
574 #endif
575
576 void co_build_ou_structure(copy_opt_t *co) {
577         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
578         INIT_LIST_HEAD(&co->units);
579         irg_walk_graph(co->irg, co_collect_units, NULL, co);
580 #ifdef QUICK_AND_DIRTY_HACK
581         co_sort_units(co);
582 #endif
583 }
584
585 void co_free_ou_structure(copy_opt_t *co) {
586         unit_t *curr, *tmp;
587         ASSERT_OU_AVAIL(co);
588         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
589                 xfree(curr->nodes);
590                 xfree(curr->costs);
591                 xfree(curr);
592         }
593         co->units.next = NULL;
594 }
595
596 /* co_solve_heuristic() is implemented in becopyheur.c */
597
598 int co_get_max_copy_costs(const copy_opt_t *co) {
599         int i, res = 0;
600         unit_t *curr;
601
602         ASSERT_OU_AVAIL(co);
603
604         list_for_each_entry(unit_t, curr, &co->units, units) {
605                 res += curr->inevitable_costs;
606                 for (i=1; i<curr->node_count; ++i)
607                         res += curr->costs[i];
608         }
609         return res;
610 }
611
612 int co_get_inevit_copy_costs(const copy_opt_t *co) {
613         int res = 0;
614         unit_t *curr;
615
616         ASSERT_OU_AVAIL(co);
617
618         list_for_each_entry(unit_t, curr, &co->units, units)
619                 res += curr->inevitable_costs;
620         return res;
621 }
622
623 int co_get_copy_costs(const copy_opt_t *co) {
624         int i, res = 0;
625         unit_t *curr;
626
627         ASSERT_OU_AVAIL(co);
628
629         list_for_each_entry(unit_t, curr, &co->units, units) {
630                 int root_col = get_irn_col(co, curr->nodes[0]);
631                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
632                 res += curr->inevitable_costs;
633                 for (i=1; i<curr->node_count; ++i) {
634                         int arg_col = get_irn_col(co, curr->nodes[i]);
635                         if (root_col != arg_col) {
636                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
637                                 res += curr->costs[i];
638                         }
639                 }
640         }
641         return res;
642 }
643
644 int co_get_lower_bound(const copy_opt_t *co) {
645         int res = 0;
646         unit_t *curr;
647
648         ASSERT_OU_AVAIL(co);
649
650         list_for_each_entry(unit_t, curr, &co->units, units)
651                 res += curr->inevitable_costs + curr->min_nodes_costs;
652         return res;
653 }
654
655 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
656 {
657         bitset_t *seen = bitset_irg_malloc(co->irg);
658         affinity_node_t *an;
659
660         memset(stat, 0, sizeof(stat[0]));
661
662         /* count affinity edges. */
663         co_gs_foreach_aff_node(co, an) {
664                 neighb_t *neigh;
665                 stat->aff_nodes += 1;
666                 bitset_add_irn(seen, an->irn);
667                 co_gs_foreach_neighb(an, neigh) {
668                         if(!bitset_contains_irn(seen, neigh->irn)) {
669                                 stat->aff_edges += 1;
670                                 stat->max_costs += neigh->costs;
671
672                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
673                                         stat->costs += neigh->costs;
674                                         stat->unsatisfied_edges += 1;
675                                 }
676
677                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
678                                         stat->aff_int += 1;
679                                         stat->inevit_costs += neigh->costs;
680                                 }
681
682                         }
683                 }
684         }
685
686         bitset_free(seen);
687 }
688
689 /******************************************************************************
690    _____                 _        _____ _
691   / ____|               | |      / ____| |
692  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
693  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
694  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
695   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
696                   | |                                       __/ |
697                   |_|                                      |___/
698  ******************************************************************************/
699
700 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
701         const affinity_node_t *n1 = k1;
702         const affinity_node_t *n2 = k2;
703
704         return (n1->irn != n2->irn);
705 }
706
707 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
708         affinity_node_t new_node, *node;
709         neighb_t new_nbr, *nbr;
710         int allocnew;
711
712         new_node.irn        = n1;
713         new_node.degree     = 0;
714         new_node.neighbours = NULL;
715         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
716
717         allocnew = 1;
718         for (nbr = node->neighbours; nbr; nbr = nbr->next)
719                 if (nbr->irn == n2) {
720                         allocnew = 0;
721                         break;
722                 }
723
724         /* if we did not find n2 in n1's neighbourhood insert it */
725         if (allocnew) {
726                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
727                 nbr = obstack_finish(&co->obst);
728                 nbr->irn   = n2;
729                 nbr->costs = 0;
730                 nbr->next  = node->neighbours;
731                 node->neighbours = nbr;
732                 node->degree++;
733         }
734
735         /* now nbr points to n1's neighbour-entry of n2 */
736         nbr->costs += costs;
737 }
738
739 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
740         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
741                 add_edge(co, n1, n2, costs);
742                 add_edge(co, n2, n1, costs);
743         }
744 }
745
746 static void build_graph_walker(ir_node *irn, void *env) {
747         copy_opt_t *co = env;
748         int pos, max;
749         arch_register_req_t req;
750         const arch_register_t *reg;
751
752         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
753                 return;
754
755         reg = arch_get_irn_register(co->aenv, irn);
756         if (arch_register_type_is(reg, ignore))
757                 return;
758
759         /* Phis */
760         if (is_Reg_Phi(irn))
761                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
762                         ir_node *arg = get_irn_n(irn, pos);
763                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
764                 }
765
766         /* Perms */
767         else if (is_Perm_Proj(co->aenv, irn)) {
768                 ir_node *arg = get_Perm_src(irn);
769                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
770         }
771
772         /* 2-address code */
773         else if (is_2addr_code(co->aenv, irn, &req))
774                 add_edges(co, irn, req.other_same, co->get_costs(co, irn, req.other_same, 0));
775 }
776
777 void co_build_graph_structure(copy_opt_t *co) {
778         obstack_init(&co->obst);
779         co->nodes = new_set(compare_affinity_node_t, 32);
780
781         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
782 }
783
784 void co_free_graph_structure(copy_opt_t *co) {
785         ASSERT_GS_AVAIL(co);
786
787         del_set(co->nodes);
788         obstack_free(&co->obst, NULL);
789         co->nodes = NULL;
790 }
791
792 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
793
794 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
795         affinity_node_t new_node, *n;
796
797         ASSERT_GS_AVAIL(co);
798
799         new_node.irn = irn;
800         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
801         if (n) {
802                 return (n->degree > 0);
803         } else
804                 return 0;
805 }
806
807 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
808 {
809         be_ifg_t *ifg  = co->cenv->ifg;
810         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
811         bitset_t *adm  = bitset_alloca(co->cls->n_regs);
812
813         ir_node *irn;
814         void *it, *nit;
815         int i, n, n_regs;
816
817         n_regs = 0;
818         for(i = 0; i < co->cls->n_regs; ++i) {
819                 const arch_register_t *reg = &co->cls->regs[i];
820                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
821         }
822
823         /*
824          * n contains the first node number.
825          * the values below n are the pre-colored register nodes
826          */
827
828         it  = be_ifg_nodes_iter_alloca(ifg);
829         nit = be_ifg_neighbours_iter_alloca(ifg);
830
831         n = n_regs;
832         be_ifg_foreach_node(ifg, it, irn) {
833                 if(!arch_irn_is(co->aenv, irn, ignore))
834                         set_irn_link(irn, INT_TO_PTR(n++));
835         }
836
837         fprintf(f, "%d %d\n", n, n_regs);
838
839         be_ifg_foreach_node(ifg, it, irn) {
840                 if(!arch_irn_is(co->aenv, irn, ignore)) {
841                         int idx            = PTR_TO_INT(get_irn_link(irn));
842                         affinity_node_t *a = get_affinity_info(co, irn);
843
844                         arch_register_req_t req;
845                         ir_node *adj;
846
847                         arch_get_register_req(co->aenv, &req, irn, BE_OUT_POS(0));
848                         if(arch_register_req_is(&req, limited)) {
849                                 bitset_clear_all(adm);
850                                 req.limited(req.limited_env, adm);
851                                 for(i = 0; i < co->cls->n_regs; ++i)
852                                         if(!bitset_is_set(adm, i) && color_map[i] >= 0)
853                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
854
855                         }
856
857
858                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
859                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
860                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
861                                         if(idx < adj_idx)
862                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
863                                 }
864                         }
865
866                         if(a) {
867                                 neighb_t *n;
868
869                                 co_gs_foreach_neighb(a, n) {
870                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
871                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
872                                                 if(idx < n_idx)
873                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
874                                         }
875                                 }
876                         }
877                 }
878         }
879 }
880
881 typedef struct _appel_clique_walker_t {
882         phase_t ph;
883         const copy_opt_t *co;
884         int curr_nr;
885         int node_count;
886         FILE *f;
887         int dumb;
888         int *color_map;
889         struct obstack obst;
890 } appel_clique_walker_t;
891
892 typedef struct _appel_block_info_t {
893         int *live_end_nr;
894         int *live_in_nr;
895         int *phi_nr;
896         ir_node **live_end;
897         ir_node **live_in;
898         ir_node **phi;
899         int n_live_end;
900         int n_live_in;
901         int n_phi;
902 } appel_block_info_t;
903
904 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
905 {
906 #if 0
907         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
908         int res = (int) freq;
909         return res == 0 ? 1 : res;
910 #else
911         ir_loop *loop = get_irn_loop(bl);
912         if(loop) {
913                 int d = get_loop_depth(loop);
914                 return 1 + d * d;
915         }
916         return 1;
917 #endif
918 }
919
920 static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old)
921 {
922         appel_block_info_t *res = NULL;
923
924         if(is_Block(irn)) {
925                 appel_clique_walker_t *d = (void *) phase;
926                 res = phase_alloc(phase, sizeof(res[0]));
927                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
928                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
929                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
930                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
931                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
932                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
933         }
934
935         return res;
936 }
937
938 typedef struct _insn_list_t {
939         be_insn_t *insn;
940         struct list_head list;
941 } insn_list_t;
942
943 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
944 {
945         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
946         int i;
947
948         for(i = 0; i < bli->n_live_end; ++i)
949                 if(bli->live_end[i] == irn)
950                         return bli->live_end_nr[i];
951
952         return -1;
953 }
954
955 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
956 {
957         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
958         ir_node *irn;
959         int n_live;
960         int j;
961
962         n_live = 0;
963         foreach_pset(live, irn)
964                 live_arr[n_live++] = irn;
965
966         /* dump the live after clique */
967         if(!env->dumb) {
968                 for(j = 0; j < n_live; ++j) {
969                         int k;
970
971                         for(k = j + 1; k < n_live; ++k) {
972                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
973                         }
974                         fprintf(env->f, "\n");
975                 }
976         }
977
978         /* dump the affinities */
979         for(j = 0; !env->dumb && j < n_live; ++j) {
980                 ir_node *irn = live_arr[j];
981                 int old_nr = PTR_TO_INT(get_irn_link(irn));
982
983                 /* if the node was already live in the last insn dump the affinity */
984                 if(old_nr > start_nr) {
985                         int weight = appel_aff_weight(env, bl);
986                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
987                 }
988         }
989
990         /* set the current numbers into the link field. */
991         for(j = 0; j < n_live; ++j) {
992                 ir_node *irn = live_arr[j];
993                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
994         }
995
996         return curr_nr + n_live;
997 }
998
999 static void appel_walker(ir_node *bl, void *data)
1000 {
1001         appel_clique_walker_t *env = data;
1002         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1003         struct obstack *obst       = &env->obst;
1004         void *base                 = obstack_base(obst);
1005         pset *live                 = pset_new_ptr_default();
1006         be_lv_t *lv                = env->co->cenv->birg->lv;
1007
1008         int n_insns  = 0;
1009         int n_nodes  = 0;
1010         int start_nr = env->curr_nr;
1011         int curr_nr  = start_nr;
1012
1013         be_insn_env_t insn_env;
1014         int i, j;
1015         ir_node *irn;
1016         be_insn_t **insns;
1017
1018         insn_env.aenv = env->co->aenv;
1019         insn_env.cls  = env->co->cls;
1020         insn_env.obst = obst;
1021         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1022
1023         /* Guess how many insns will be in this block. */
1024         sched_foreach(bl, irn)
1025                 n_nodes++;
1026
1027         bli->n_phi = 0;
1028         insns = malloc(n_nodes * sizeof(insns[0]));
1029
1030         /* Put all insns in an array. */
1031         irn = sched_first(bl);
1032         while(!sched_is_end(irn)) {
1033                 be_insn_t *insn;
1034                 insn = be_scan_insn(&insn_env, irn);
1035                 insns[n_insns++] = insn;
1036                 irn = insn->next_insn;
1037         }
1038
1039         DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl));
1040         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live);
1041
1042         /* Generate the bad and ugly. */
1043         for(i = n_insns - 1; i >= 0; --i) {
1044                 be_insn_t *insn = insns[i];
1045
1046                 /* The first live set has to be saved in the block border set. */
1047                 if(i == n_insns - 1) {
1048                         j = 0;
1049                         foreach_pset(live, irn) {
1050                                 bli->live_end[j]    = irn;
1051                                 bli->live_end_nr[j] = curr_nr + j;
1052                                 ++j;
1053                         }
1054                         bli->n_live_end = j;
1055                 }
1056
1057                 if(!env->dumb) {
1058                         for(j = 0; j < insn->use_start; ++j) {
1059                                 ir_node *op   = insn->ops[j].carrier;
1060                                 bitset_t *adm = insn->ops[j].regs;
1061                                 int k;
1062                                 int nr;
1063
1064                                 if(!insn->ops[j].has_constraints)
1065                                         continue;
1066
1067                                 nr = 0;
1068                                 foreach_pset(live, irn) {
1069                                         if(irn == op) {
1070                                                 pset_break(live);
1071                                                 break;
1072                                         }
1073                                         ++nr;
1074                                 }
1075
1076                                 assert(nr < pset_count(live));
1077
1078                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1079                                         int mapped_col = env->color_map[k];
1080                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1081                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1082                                 }
1083                         }
1084                 }
1085
1086                 /* dump the clique and update the stuff. */
1087                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1088
1089                 /* remove all defs. */
1090                 for(j = 0; j < insn->use_start; ++j)
1091                         pset_remove_ptr(live, insn->ops[j].carrier);
1092
1093                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1094                         bli->phi[bli->n_phi]    = insn->irn;
1095                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1096                         bli->n_phi++;
1097                 }
1098
1099                 /* add all uses */
1100                 else
1101                         for(j = insn->use_start; j < insn->n_ops; ++j)
1102                                 pset_insert_ptr(live, insn->ops[j].carrier);
1103         }
1104
1105         /* print the start clique. */
1106         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1107
1108         i = 0;
1109         foreach_pset(live, irn) {
1110                 bli->live_in[i]    = irn;
1111                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1112                 ++i;
1113         }
1114         bli->n_live_in = i;
1115
1116         del_pset(live);
1117         free(insns);
1118         obstack_free(obst, base);
1119         env->curr_nr = curr_nr;
1120 }
1121
1122 static void appel_inter_block_aff(ir_node *bl, void *data)
1123 {
1124         appel_clique_walker_t *env = data;
1125         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1126
1127         int i, j, n;
1128
1129         for(i = 0; i < bli->n_live_in; ++i) {
1130                 ir_node *irn = bli->live_in[i];
1131
1132                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1133                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1134
1135                         int nr = appel_get_live_end_nr(env, pred, irn);
1136                         assert(nr >= 0);
1137                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1138                 }
1139         }
1140
1141         for(i = 0; i < bli->n_phi; ++i) {
1142                 ir_node *irn = bli->phi[i];
1143
1144                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1145                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1146                         ir_node *op = get_irn_n(irn, j);
1147
1148                         int nr = appel_get_live_end_nr(env, pred, op);
1149                         assert(nr >= 0);
1150                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1151                 }
1152         }
1153
1154 }
1155
1156 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1157 {
1158         int i;
1159         int n_colors;
1160         appel_clique_walker_t env;
1161         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1162         be_lv_t *lv = co->cenv->birg->lv;
1163
1164         be_liveness_recompute(lv);
1165         obstack_init(&env.obst);
1166         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init);
1167         env.curr_nr = co->cls->n_regs;
1168         env.co = co;
1169         env.f = f;
1170
1171         bitset_copy(adm, co->cenv->ignore_colors);
1172         bitset_flip_all(adm);
1173
1174         /* Make color map. */
1175         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1176         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1177                 const arch_register_t *reg = &co->cls->regs[i];
1178                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1179         }
1180
1181         env.dumb = 1;
1182         env.curr_nr = n_colors;
1183         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1184         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1185
1186         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1187
1188         /* make the first k nodes interfere */
1189         for(i = 0; i < n_colors; ++i) {
1190                 int j;
1191                 for(j = i + 1; j < n_colors; ++j)
1192                         fprintf(f, "%d %d -1 ", i, j);
1193                 fprintf(f, "\n");
1194         }
1195
1196         env.dumb = 0;
1197         env.curr_nr = n_colors;
1198         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1199         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1200         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1201         obstack_free(&env.obst, NULL);
1202 }
1203
1204 /*
1205          ___ _____ ____   ____   ___ _____   ____                        _
1206         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1207          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1208          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1209         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1210                                                                   |_|            |___/
1211 */
1212
1213 static const char *get_dot_color_name(int col)
1214 {
1215         static const char *names[] = {
1216                 "blue",
1217                 "red",
1218                 "green",
1219                 "yellow",
1220                 "cyan",
1221                 "magenta",
1222                 "orange",
1223                 "chocolate",
1224                 "beige",
1225                 "navy",
1226                 "darkgreen",
1227                 "darkred",
1228                 "lightPink",
1229                 "chartreuse",
1230                 "lightskyblue",
1231                 "linen",
1232                 "pink",
1233                 "lightslateblue",
1234                 "mintcream",
1235                 "red",
1236                 "darkolivegreen",
1237                 "mediumblue",
1238                 "mistyrose",
1239                 "salmon",
1240                 "darkseagreen",
1241                 "mediumslateblue"
1242                 "moccasin",
1243                 "tomato",
1244                 "forestgreen",
1245                 "darkturquoise",
1246                 "palevioletred"
1247         };
1248
1249         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1250 }
1251
1252 typedef struct _co_ifg_dump_t {
1253         const copy_opt_t *co;
1254         unsigned flags;
1255 } co_ifg_dump_t;
1256
1257 static void ifg_dump_graph_attr(FILE *f, void *self)
1258 {
1259         fprintf(f, "overlap=scale");
1260 }
1261
1262 static int ifg_is_dump_node(void *self, ir_node *irn)
1263 {
1264         co_ifg_dump_t *cod = self;
1265         return !arch_irn_is(cod->co->aenv, irn, ignore);
1266 }
1267
1268 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1269 {
1270         co_ifg_dump_t *env         = self;
1271         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1272         arch_register_req_t req;
1273         int limited;
1274
1275         arch_get_register_req(env->co->aenv, &req, irn, BE_OUT_POS(0));
1276         limited = arch_register_req_is(&req, limited);
1277
1278         if(env->flags & CO_IFG_DUMP_LABELS) {
1279                 ir_fprintf(f, "label=\"%+F", irn);
1280
1281                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1282                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1283                         req.limited(req.limited_env, bs);
1284                         ir_fprintf(f, "\\n%B", bs);
1285                 }
1286                 ir_fprintf(f, "\" ");
1287         }
1288
1289         else
1290                 fprintf(f, "label=\"\" shape=point " );
1291
1292         if(env->flags & CO_IFG_DUMP_SHAPE)
1293                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1294
1295         if(env->flags & CO_IFG_DUMP_COLORS)
1296                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1297 }
1298
1299 static void ifg_dump_at_end(FILE *file, void *self)
1300 {
1301         co_ifg_dump_t *env = self;
1302         affinity_node_t *a;
1303
1304         co_gs_foreach_aff_node(env->co, a) {
1305                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1306                 unsigned aidx = get_irn_idx(a->irn);
1307                 neighb_t *n;
1308
1309                 co_gs_foreach_neighb(a, n) {
1310                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1311                         unsigned nidx = get_irn_idx(n->irn);
1312
1313                         if(aidx < nidx) {
1314                                 const char *color = nr == ar ? "blue" : "red";
1315                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1316                                 if(env->flags & CO_IFG_DUMP_LABELS)
1317                                         fprintf(file, "label=\"%d\" ", n->costs);
1318                                 if(env->flags & CO_IFG_DUMP_COLORS)
1319                                         fprintf(file, "color=%s ", color);
1320                                 else
1321                                         fprintf(file, "style=dotted");
1322                                 fprintf(file, "];\n");
1323                         }
1324                 }
1325         }
1326 }
1327
1328
1329 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1330         ifg_is_dump_node,
1331         ifg_dump_graph_attr,
1332         ifg_dump_node_attr,
1333         NULL,
1334         NULL,
1335         ifg_dump_at_end
1336 };
1337
1338
1339
1340 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1341 {
1342         co_ifg_dump_t cod;
1343
1344         cod.co    = co;
1345         cod.flags = flags;
1346         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1347 }
1348
1349
1350 void co_solve_park_moon(copy_opt_t *opt)
1351 {
1352
1353 }
1354
1355 static int void_algo(copy_opt_t *co)
1356 {
1357         return 0;
1358 }
1359
1360 /*
1361                 _    _                  _ _   _
1362            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1363           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1364          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1365         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1366                            |___/
1367 */
1368
1369 typedef struct {
1370         co_algo_t *algo;
1371         const char *name;
1372         int can_improve_existing;
1373 } co_algo_info_t;
1374
1375 static co_algo_info_t algos[] = {
1376         { void_algo,                "none",  0 },
1377         { co_solve_heuristic,       "heur1", 0 },
1378         { co_solve_heuristic_new,   "heur2", 0 },
1379         { co_solve_heuristic_java,  "heur3", 0 },
1380 #ifdef WITH_ILP
1381         { co_solve_ilp2,            "ilp",   1 },
1382 #endif
1383         { NULL,                     "",      0 }
1384 };
1385
1386 /*
1387     __  __       _         ____       _
1388    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1389    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1390    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1391    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1392
1393 */
1394
1395 void co_driver(be_chordal_env_t *cenv)
1396 {
1397 #ifdef WITH_LIBCORE
1398         lc_timer_t *timer = lc_timer_register("firm.be.copyopt", "runtime");
1399 #endif
1400         co_complete_stats_t before, after;
1401         copy_opt_t *co;
1402         co_algo_t  *algo_func;
1403         int was_optimal = 0;
1404
1405         if(algo < 0 || algo >= CO_ALGO_LAST)
1406                 return;
1407
1408         co = new_copy_opt(cenv, cost_func);
1409         co_build_ou_structure(co);
1410         co_build_graph_structure(co);
1411
1412         co_complete_stats(co, &before);
1413
1414         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1415         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1416         be_stat_ev_ull("co_max_costs",    before.max_costs);
1417         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1418         be_stat_ev_ull("co_aff_int",      before.aff_int);
1419
1420         be_stat_ev_ull("co_init_costs",   before.costs);
1421         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1422
1423         /* Dump the interference graph in Appel's format. */
1424         if(dump_flags & DUMP_APPEL) {
1425                 FILE *f = be_chordal_open(cenv, "", ".apl");
1426                 co_dump_appel_graph(co, f);
1427                 fclose(f);
1428         }
1429
1430         if(dump_flags & DUMP_BEFORE) {
1431                 FILE *f = be_chordal_open(cenv, "", "-before.dot");
1432                 co_dump_ifg_dot(co, f, style_flags);
1433                 fclose(f);
1434         }
1435
1436         /* if the algo can improve results, provide an initial solution with heur3 */
1437         if(improve && algos[algo].can_improve_existing) {
1438                 co_complete_stats_t stats;
1439
1440                 /* produce a heuristical solution */
1441                 co_solve_heuristic_java(co);
1442
1443                 /* do the stats and provide the current costs */
1444                 co_complete_stats(co, &stats);
1445                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1446         }
1447
1448 #ifdef WITH_JVM
1449         /* start the JVM here so that it does not tamper the timing. */
1450         if(algo == CO_ALGO_HEUR3)
1451                 be_java_coal_start_jvm();
1452 #endif
1453
1454         algo_func = algos[algo].algo;
1455
1456 #ifdef WITH_LIBCORE
1457         lc_timer_reset_and_start(timer);
1458 #endif
1459
1460         was_optimal = algo_func(co);
1461
1462 #ifdef WITH_LIBCORE
1463         lc_timer_stop(timer);
1464         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1465 #endif
1466
1467         be_stat_ev_ull("co_optimal", was_optimal);
1468
1469         if(dump_flags & DUMP_AFTER) {
1470                 FILE *f = be_chordal_open(cenv, "", "-after.dot");
1471                 co_dump_ifg_dot(co, f, style_flags);
1472                 fclose(f);
1473         }
1474
1475         co_complete_stats(co, &after);
1476
1477         if(do_stats) {
1478                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1479                 ulong64 evitable          = after.costs     - after.inevit_costs;
1480
1481                 ir_printf("%30F ", cenv->irg);
1482                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1483
1484                 if(optimizable_costs > 0)
1485                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1486                 else
1487                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1488         }
1489
1490         be_stat_ev_ull("co_after_costs", after.costs);
1491         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1492
1493         co_free_graph_structure(co);
1494         co_free_ou_structure(co);
1495         free_copy_opt(co);
1496 }