0001f7f8c6dba3cfadf2f6132b269a2cd7046c49
[libfirm] / ir / be / becopyopt.c
1 /**
2  * Author:      Daniel Grund
3  * Date:                12.04.2005
4  * Copyright:   (c) Universitaet Karlsruhe
5  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  */
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10
11 #ifdef HAVE_ALLOCA_H
12 #include <alloca.h>
13 #endif
14 #ifdef HAVE_MALLOC_H
15 #include <malloc.h>
16 #endif
17
18 #include "execfreq.h"
19 #include "xmalloc.h"
20 #include "debug.h"
21 #include "pmap.h"
22 #include "irgraph.h"
23 #include "irgwalk.h"
24 #include "irprog.h"
25 #include "irloop_t.h"
26 #include "iredges_t.h"
27 #include "phiclass.h"
28 #include "irbitset.h"
29 #include "irphase_t.h"
30 #include "irprintf_t.h"
31
32 #include "bemodule.h"
33 #include "bearch.h"
34 #include "benode_t.h"
35 #include "beutil.h"
36 #include "beifg_t.h"
37 #include "becopyopt_t.h"
38 #include "becopystat.h"
39 #include "belive_t.h"
40 #include "beinsn_t.h"
41 #include "besched_t.h"
42 #include "benodesets.h"
43 #include "bejavacoal.h"
44 #include "bestatevent.h"
45 #include "beirg_t.h"
46 #include "error.h"
47
48 #include <libcore/lc_timing.h>
49 #include <libcore/lc_opts.h>
50 #include <libcore/lc_opts_enum.h>
51
52 #define DUMP_BEFORE 1
53 #define DUMP_AFTER  2
54 #define DUMP_APPEL  4
55 #define DUMP_ALL    2 * DUMP_APPEL - 1
56
57 #define COST_FUNC_FREQ     1
58 #define COST_FUNC_LOOP     2
59 #define COST_FUNC_ALL_ONE  3
60
61 static unsigned dump_flags    = 0;
62 static unsigned style_flags   = 0;
63 static unsigned  do_stats     = 0;
64 static cost_fct_t cost_func   = co_get_costs_exec_freq;
65 static unsigned algo          = CO_ALGO_HEUR2;
66 static int improve            = 1;
67
68 static const lc_opt_enum_mask_items_t dump_items[] = {
69         { "before",  DUMP_BEFORE },
70         { "after",   DUMP_AFTER  },
71         { "appel",   DUMP_APPEL  },
72         { "all",     DUMP_ALL    },
73         { NULL,      0 }
74 };
75
76 static const lc_opt_enum_mask_items_t style_items[] = {
77         { "color",   CO_IFG_DUMP_COLORS },
78         { "labels",  CO_IFG_DUMP_LABELS },
79         { "constr",  CO_IFG_DUMP_CONSTR },
80         { "shape",   CO_IFG_DUMP_SHAPE  },
81         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
82         { NULL,      0 }
83 };
84
85 static const lc_opt_enum_mask_items_t algo_items[] = {
86         { "none",   CO_ALGO_NONE  },
87         { "heur",   CO_ALGO_HEUR  },
88         { "heur2",  CO_ALGO_HEUR2 },
89 #ifdef WITH_JVM
90         { "heur3",  CO_ALGO_HEUR3 },
91 #endif /* WITH_JVM */
92 #ifdef WITH_ILP
93         { "ilp",    CO_ALGO_ILP   },
94 #endif /* WITH_ILP */
95         { NULL,     0 }
96 };
97
98 typedef int (*opt_funcptr)(void);
99
100 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
101         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
102         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
103         { "one",    (opt_funcptr) co_get_costs_all_one },
104         { NULL,     NULL }
105 };
106
107 static lc_opt_enum_mask_var_t dump_var = {
108         &dump_flags, dump_items
109 };
110
111 static lc_opt_enum_mask_var_t style_var = {
112         &style_flags, style_items
113 };
114
115 static lc_opt_enum_mask_var_t algo_var = {
116         &algo, algo_items
117 };
118
119 static lc_opt_enum_func_ptr_var_t cost_func_var = {
120         (opt_funcptr*) &cost_func, cost_func_items
121 };
122
123 static const lc_opt_table_entry_t options[] = {
124         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
125         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
126         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
127         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
128         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
129         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
130         { NULL }
131 };
132
133 /* Insert additional options registration functions here. */
134 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
135 extern void be_co2_register_options(lc_opt_entry_t *grp);
136 extern void be_co3_register_options(lc_opt_entry_t *grp);
137
138 void be_init_copycoal(void)
139 {
140         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
141         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
142         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
143         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
144
145         lc_opt_add_table(co_grp, options);
146 }
147
148 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
149
150 #undef QUICK_AND_DIRTY_HACK
151
152 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
153 {
154         if(env->ifg)
155                 return be_ifg_connected(env->ifg, a, b);
156         else
157                 return values_interfere(env->birg->lv, a, b);
158 }
159
160
161 /******************************************************************************
162     _____                           _
163    / ____|                         | |
164   | |  __  ___ _ __   ___ _ __ __ _| |
165   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
166   | |__| |  __/ | | |  __/ | | (_| | |
167    \_____|\___|_| |_|\___|_|  \__,_|_|
168
169  ******************************************************************************/
170
171 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
172
173
174 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
175 {
176         const char *s1, *s2, *s3;
177         int len;
178         copy_opt_t *co;
179
180         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
181
182         co = xcalloc(1, sizeof(*co));
183         co->cenv      = chordal_env;
184         co->aenv      = chordal_env->birg->main_env->arch_env;
185         co->irg       = chordal_env->irg;
186         co->cls       = chordal_env->cls;
187         co->get_costs = get_costs;
188
189         s1 = get_irp_prog_name();
190         s2 = get_entity_name(get_irg_entity(co->irg));
191         s3 = chordal_env->cls->name;
192         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
193         co->name = xmalloc(len);
194         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
195
196         return co;
197 }
198
199 void free_copy_opt(copy_opt_t *co) {
200         xfree(co->name);
201         free(co);
202 }
203
204 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
205         arch_register_req_t req;
206         const arch_register_t *reg;
207
208         if (arch_irn_is(co->aenv, irn, ignore))
209                 return 0;
210
211         reg = arch_get_irn_register(co->aenv, irn);
212         if (arch_register_type_is(reg, ignore))
213                 return 0;
214
215         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(co->aenv, irn, &req))
216                 return 1;
217
218         return 0;
219 }
220
221 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
222         const ir_edge_t *edge;
223         const arch_register_t *reg;
224
225         assert(0 && "Is buggy and obsolete. Do not use");
226
227         if (arch_irn_is(co->aenv, irn, ignore))
228                 return 0;
229
230         reg = arch_get_irn_register(co->aenv, irn);
231         if (arch_register_type_is(reg, ignore))
232                 return 0;
233
234         foreach_out_edge(irn, edge) {
235                 ir_node *n = edge->src;
236
237                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
238                         arch_register_req_t req;
239                         arch_get_register_req(co->aenv, &req, n, -1);
240
241                         if(is_Reg_Phi(n) ||
242                            is_Perm(co->aenv, n) ||
243                            (arch_register_req_is(&req, should_be_same) && req.other_same == irn)
244                           )
245                                 return 1;
246                 }
247         }
248
249         return 0;
250 }
251
252 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
253         int cost = 0;
254         ir_loop *loop;
255         ir_node *root_block = get_nodes_block(root);
256
257         if (is_Phi(root)) {
258                 /* for phis the copies are placed in the corresponding pred-block */
259                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
260         } else {
261                 /* a perm places the copy in the same block as it resides */
262                 loop = get_irn_loop(root_block);
263         }
264         if (loop) {
265                 int d = get_loop_depth(loop);
266                 cost = d*d;
267         }
268         return 1+cost;
269 }
270
271 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
272         int res;
273         ir_node *root_bl = get_nodes_block(root);
274         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
275         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
276
277         /* don't allow values smaller than one. */
278         return res < 1 ? 1 : res;
279 }
280
281
282 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
283         return 1;
284 }
285
286 /******************************************************************************
287    ____        _   _    _       _ _          _____ _
288   / __ \      | | | |  | |     (_) |        / ____| |
289  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
290  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
291  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
292   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
293         | |                                                            __/ |
294         |_|                                                           |___/
295  ******************************************************************************/
296
297 /**
298  * Determines a maximum weighted independent set with respect to
299  * the interference and conflict edges of all nodes in a qnode.
300  */
301 static int ou_max_ind_set_costs(unit_t *ou) {
302         be_chordal_env_t *chordal_env = ou->co->cenv;
303         ir_node **safe, **unsafe;
304         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
305         bitset_t *curr;
306         int max, pos, curr_weight, best_weight = 0;
307
308         /* assign the nodes into two groups.
309          * safe: node has no interference, hence it is in every max stable set.
310          * unsafe: node has an interference
311          */
312         safe = alloca((ou->node_count-1) * sizeof(*safe));
313         safe_costs = 0;
314         safe_count = 0;
315         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
316         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
317         unsafe_count = 0;
318         for(i=1; i<ou->node_count; ++i) {
319                 int is_safe = 1;
320                 for(o=1; o<ou->node_count; ++o) {
321                         if (i==o)
322                                 continue;
323                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
324                                 unsafe_costs[unsafe_count] = ou->costs[i];
325                                 unsafe[unsafe_count] = ou->nodes[i];
326                                 ++unsafe_count;
327                                 is_safe = 0;
328                                 break;
329                         }
330                 }
331                 if (is_safe) {
332                         safe_costs += ou->costs[i];
333                         safe[safe_count++] = ou->nodes[i];
334                 }
335         }
336
337
338         /* now compute the best set out of the unsafe nodes*/
339         if (unsafe_count > MIS_HEUR_TRIGGER) {
340                 bitset_t *best = bitset_alloca(unsafe_count);
341                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
342                 for (i=0; i<unsafe_count; ++i) {
343                         bitset_set(best, i);
344                         /* check if it is a stable set */
345                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
346                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
347                                         bitset_clear(best, i); /* clear the bit and try next one */
348                                         break;
349                                 }
350                 }
351                 /* compute the weight */
352                 bitset_foreach(best, pos)
353                         best_weight += unsafe_costs[pos];
354         } else {
355                 /* Exact Algorithm: Brute force */
356                 curr = bitset_alloca(unsafe_count);
357                 bitset_set_all(curr);
358                 while ((max = bitset_popcnt(curr)) != 0) {
359                         /* check if curr is a stable set */
360                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
361                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
362                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
363                                                         goto no_stable_set;
364
365                         /* if we arrive here, we have a stable set */
366                         /* compute the weigth of the stable set*/
367                         curr_weight = 0;
368                         bitset_foreach(curr, pos)
369                                 curr_weight += unsafe_costs[pos];
370
371                         /* any better ? */
372                         if (curr_weight > best_weight) {
373                                 best_weight = curr_weight;
374                         }
375
376         no_stable_set:
377                         bitset_minus1(curr);
378                 }
379         }
380
381         return safe_costs+best_weight;
382 }
383
384 static void co_collect_units(ir_node *irn, void *env) {
385         copy_opt_t *co = env;
386         unit_t *unit;
387         arch_register_req_t req;
388
389         if (!is_curr_reg_class(co, irn))
390                 return;
391         if (!co_is_optimizable_root(co, irn))
392                 return;
393
394         /* Init a new unit */
395         unit = xcalloc(1, sizeof(*unit));
396         unit->co = co;
397         unit->node_count = 1;
398         INIT_LIST_HEAD(&unit->queue);
399
400         /* Phi with some/all of its arguments */
401         if (is_Reg_Phi(irn)) {
402                 int i, arity;
403
404                 /* init */
405                 arity = get_irn_arity(irn);
406                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
407                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
408                 unit->nodes[0] = irn;
409
410                 /* fill */
411                 for (i=0; i<arity; ++i) {
412                         int o, arg_pos;
413                         ir_node *arg = get_irn_n(irn, i);
414
415                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
416                         if (arg == irn)
417                                 continue;
418                         if (nodes_interfere(co->cenv, irn, arg)) {
419                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
420                                 continue;
421                         }
422
423                         /* Else insert the argument of the phi to the members of this ou */
424                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
425
426                         /* Check if arg has occurred at a prior position in the arg/list */
427                         arg_pos = 0;
428                         for (o=0; o<unit->node_count; ++o)
429                                 if (unit->nodes[o] == arg) {
430                                         arg_pos = o;
431                                         break;
432                                 }
433
434                         if (!arg_pos) { /* a new argument */
435                                 /* insert node, set costs */
436                                 unit->nodes[unit->node_count] = arg;
437                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
438                                 unit->node_count++;
439                         } else { /* arg has occured before in same phi */
440                                 /* increase costs for existing arg */
441                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
442                         }
443                 }
444                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
445                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
446         } else
447
448         /* Proj of a perm with corresponding arg */
449         if (is_Perm_Proj(co->aenv, irn)) {
450                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
451                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
452                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
453                 unit->node_count = 2;
454                 unit->nodes[0] = irn;
455                 unit->nodes[1] = get_Perm_src(irn);
456                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
457         } else
458
459         /* Src == Tgt of a 2-addr-code instruction */
460         if (is_2addr_code(co->aenv, irn, &req)) {
461                 ir_node *other = req.other_same;
462                 if (!nodes_interfere(co->cenv, irn, other)) {
463                         unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
464                         unit->costs = xmalloc(2 * sizeof(*unit->costs));
465                         unit->node_count = 2;
466                         unit->nodes[0] = irn;
467                         unit->nodes[1] = other;
468                         unit->costs[1] = co->get_costs(co, irn, other, -1);
469                 }
470         } else
471                 assert(0 && "This is not an optimizable node!");
472
473         /* Insert the new unit at a position according to its costs */
474         if (unit->node_count > 1) {
475                 int i;
476                 struct list_head *tmp;
477
478                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
479                 for(i=1; i<unit->node_count; ++i) {
480                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
481                         unit->all_nodes_costs += unit->costs[i];
482                 }
483
484                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
485                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
486                 /* Insert the new ou according to its sort_key */
487                 tmp = &co->units;
488                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
489                         tmp = tmp->next;
490                 list_add(&unit->units, tmp);
491         } else {
492                 free(unit);
493         }
494 }
495
496 #ifdef QUICK_AND_DIRTY_HACK
497
498 static int compare_ous(const void *k1, const void *k2) {
499         const unit_t *u1 = *((const unit_t **) k1);
500         const unit_t *u2 = *((const unit_t **) k2);
501         int i, o, u1_has_constr, u2_has_constr;
502         arch_register_req_t req;
503         const arch_env_t *aenv = u1->co->aenv;
504
505         /* Units with constraints come first */
506         u1_has_constr = 0;
507         for (i=0; i<u1->node_count; ++i) {
508                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
509                 if (arch_register_req_is(&req, limited)) {
510                         u1_has_constr = 1;
511                         break;
512                 }
513         }
514
515         u2_has_constr = 0;
516         for (i=0; i<u2->node_count; ++i) {
517                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
518                 if (arch_register_req_is(&req, limited)) {
519                         u2_has_constr = 1;
520                         break;
521                 }
522         }
523
524         if (u1_has_constr != u2_has_constr)
525                 return u2_has_constr - u1_has_constr;
526
527         /* Now check, whether the two units are connected */
528 #if 0
529         for (i=0; i<u1->node_count; ++i)
530                 for (o=0; o<u2->node_count; ++o)
531                         if (u1->nodes[i] == u2->nodes[o])
532                                 return 0;
533 #endif
534
535         /* After all, the sort key decides. Greater keys come first. */
536         return u2->sort_key - u1->sort_key;
537
538 }
539
540 /**
541  * Sort the ou's according to constraints and their sort_key
542  */
543 static void co_sort_units(copy_opt_t *co) {
544         int i, count = 0, costs;
545         unit_t *ou, **ous;
546
547         /* get the number of ous, remove them form the list and fill the array */
548         list_for_each_entry(unit_t, ou, &co->units, units)
549                 count++;
550         ous = alloca(count * sizeof(*ous));
551
552         costs = co_get_max_copy_costs(co);
553
554         i = 0;
555         list_for_each_entry(unit_t, ou, &co->units, units)
556                 ous[i++] = ou;
557
558         INIT_LIST_HEAD(&co->units);
559
560         assert(count == i && list_empty(&co->units));
561
562         for (i=0; i<count; ++i)
563                 ir_printf("%+F\n", ous[i]->nodes[0]);
564
565         qsort(ous, count, sizeof(*ous), compare_ous);
566
567         ir_printf("\n\n");
568         for (i=0; i<count; ++i)
569                 ir_printf("%+F\n", ous[i]->nodes[0]);
570
571         /* reinsert into list in correct order */
572         for (i=0; i<count; ++i)
573                 list_add_tail(&ous[i]->units, &co->units);
574
575         assert(costs == co_get_max_copy_costs(co));
576 }
577 #endif
578
579 void co_build_ou_structure(copy_opt_t *co) {
580         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
581         INIT_LIST_HEAD(&co->units);
582         irg_walk_graph(co->irg, co_collect_units, NULL, co);
583 #ifdef QUICK_AND_DIRTY_HACK
584         co_sort_units(co);
585 #endif
586 }
587
588 void co_free_ou_structure(copy_opt_t *co) {
589         unit_t *curr, *tmp;
590         ASSERT_OU_AVAIL(co);
591         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
592                 xfree(curr->nodes);
593                 xfree(curr->costs);
594                 xfree(curr);
595         }
596         co->units.next = NULL;
597 }
598
599 /* co_solve_heuristic() is implemented in becopyheur.c */
600
601 int co_get_max_copy_costs(const copy_opt_t *co) {
602         int i, res = 0;
603         unit_t *curr;
604
605         ASSERT_OU_AVAIL(co);
606
607         list_for_each_entry(unit_t, curr, &co->units, units) {
608                 res += curr->inevitable_costs;
609                 for (i=1; i<curr->node_count; ++i)
610                         res += curr->costs[i];
611         }
612         return res;
613 }
614
615 int co_get_inevit_copy_costs(const copy_opt_t *co) {
616         int res = 0;
617         unit_t *curr;
618
619         ASSERT_OU_AVAIL(co);
620
621         list_for_each_entry(unit_t, curr, &co->units, units)
622                 res += curr->inevitable_costs;
623         return res;
624 }
625
626 int co_get_copy_costs(const copy_opt_t *co) {
627         int i, res = 0;
628         unit_t *curr;
629
630         ASSERT_OU_AVAIL(co);
631
632         list_for_each_entry(unit_t, curr, &co->units, units) {
633                 int root_col = get_irn_col(co, curr->nodes[0]);
634                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
635                 res += curr->inevitable_costs;
636                 for (i=1; i<curr->node_count; ++i) {
637                         int arg_col = get_irn_col(co, curr->nodes[i]);
638                         if (root_col != arg_col) {
639                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
640                                 res += curr->costs[i];
641                         }
642                 }
643         }
644         return res;
645 }
646
647 int co_get_lower_bound(const copy_opt_t *co) {
648         int res = 0;
649         unit_t *curr;
650
651         ASSERT_OU_AVAIL(co);
652
653         list_for_each_entry(unit_t, curr, &co->units, units)
654                 res += curr->inevitable_costs + curr->min_nodes_costs;
655         return res;
656 }
657
658 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
659 {
660         bitset_t *seen = bitset_irg_malloc(co->irg);
661         affinity_node_t *an;
662
663         memset(stat, 0, sizeof(stat[0]));
664
665         /* count affinity edges. */
666         co_gs_foreach_aff_node(co, an) {
667                 neighb_t *neigh;
668                 stat->aff_nodes += 1;
669                 bitset_add_irn(seen, an->irn);
670                 co_gs_foreach_neighb(an, neigh) {
671                         if(!bitset_contains_irn(seen, neigh->irn)) {
672                                 stat->aff_edges += 1;
673                                 stat->max_costs += neigh->costs;
674
675                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
676                                         stat->costs += neigh->costs;
677                                         stat->unsatisfied_edges += 1;
678                                 }
679
680                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
681                                         stat->aff_int += 1;
682                                         stat->inevit_costs += neigh->costs;
683                                 }
684
685                         }
686                 }
687         }
688
689         bitset_free(seen);
690 }
691
692 /******************************************************************************
693    _____                 _        _____ _
694   / ____|               | |      / ____| |
695  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
696  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
697  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
698   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
699                   | |                                       __/ |
700                   |_|                                      |___/
701  ******************************************************************************/
702
703 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
704         const affinity_node_t *n1 = k1;
705         const affinity_node_t *n2 = k2;
706
707         return (n1->irn != n2->irn);
708 }
709
710 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
711         affinity_node_t new_node, *node;
712         neighb_t new_nbr, *nbr;
713         int allocnew;
714
715         new_node.irn        = n1;
716         new_node.degree     = 0;
717         new_node.neighbours = NULL;
718         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
719
720         allocnew = 1;
721         for (nbr = node->neighbours; nbr; nbr = nbr->next)
722                 if (nbr->irn == n2) {
723                         allocnew = 0;
724                         break;
725                 }
726
727         /* if we did not find n2 in n1's neighbourhood insert it */
728         if (allocnew) {
729                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
730                 nbr = obstack_finish(&co->obst);
731                 nbr->irn   = n2;
732                 nbr->costs = 0;
733                 nbr->next  = node->neighbours;
734                 node->neighbours = nbr;
735                 node->degree++;
736         }
737
738         /* now nbr points to n1's neighbour-entry of n2 */
739         nbr->costs += costs;
740 }
741
742 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
743         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
744                 add_edge(co, n1, n2, costs);
745                 add_edge(co, n2, n1, costs);
746         }
747 }
748
749 static void build_graph_walker(ir_node *irn, void *env) {
750         copy_opt_t *co = env;
751         int pos, max;
752         arch_register_req_t req;
753         const arch_register_t *reg;
754
755         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
756                 return;
757
758         reg = arch_get_irn_register(co->aenv, irn);
759         if (arch_register_type_is(reg, ignore))
760                 return;
761
762         /* Phis */
763         if (is_Reg_Phi(irn))
764                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
765                         ir_node *arg = get_irn_n(irn, pos);
766                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
767                 }
768
769         /* Perms */
770         else if (is_Perm_Proj(co->aenv, irn)) {
771                 ir_node *arg = get_Perm_src(irn);
772                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
773         }
774
775         /* 2-address code */
776         else if (is_2addr_code(co->aenv, irn, &req))
777                 add_edges(co, irn, req.other_same, co->get_costs(co, irn, req.other_same, 0));
778 }
779
780 void co_build_graph_structure(copy_opt_t *co) {
781         obstack_init(&co->obst);
782         co->nodes = new_set(compare_affinity_node_t, 32);
783
784         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
785 }
786
787 void co_free_graph_structure(copy_opt_t *co) {
788         ASSERT_GS_AVAIL(co);
789
790         del_set(co->nodes);
791         obstack_free(&co->obst, NULL);
792         co->nodes = NULL;
793 }
794
795 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
796
797 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
798         affinity_node_t new_node, *n;
799
800         ASSERT_GS_AVAIL(co);
801
802         new_node.irn = irn;
803         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
804         if (n) {
805                 return (n->degree > 0);
806         } else
807                 return 0;
808 }
809
810 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
811 {
812         be_ifg_t *ifg  = co->cenv->ifg;
813         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
814         bitset_t *adm  = bitset_alloca(co->cls->n_regs);
815
816         ir_node *irn;
817         void *it, *nit;
818         int i, n, n_regs;
819
820         n_regs = 0;
821         for(i = 0; i < co->cls->n_regs; ++i) {
822                 const arch_register_t *reg = &co->cls->regs[i];
823                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
824         }
825
826         /*
827          * n contains the first node number.
828          * the values below n are the pre-colored register nodes
829          */
830
831         it  = be_ifg_nodes_iter_alloca(ifg);
832         nit = be_ifg_neighbours_iter_alloca(ifg);
833
834         n = n_regs;
835         be_ifg_foreach_node(ifg, it, irn) {
836                 if(!arch_irn_is(co->aenv, irn, ignore))
837                         set_irn_link(irn, INT_TO_PTR(n++));
838         }
839
840         fprintf(f, "%d %d\n", n, n_regs);
841
842         be_ifg_foreach_node(ifg, it, irn) {
843                 if(!arch_irn_is(co->aenv, irn, ignore)) {
844                         int idx            = PTR_TO_INT(get_irn_link(irn));
845                         affinity_node_t *a = get_affinity_info(co, irn);
846
847                         arch_register_req_t req;
848                         ir_node *adj;
849
850                         arch_get_register_req(co->aenv, &req, irn, BE_OUT_POS(0));
851                         if(arch_register_req_is(&req, limited)) {
852                                 bitset_clear_all(adm);
853                                 req.limited(req.limited_env, adm);
854                                 for(i = 0; i < co->cls->n_regs; ++i)
855                                         if(!bitset_is_set(adm, i) && color_map[i] >= 0)
856                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
857
858                         }
859
860
861                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
862                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
863                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
864                                         if(idx < adj_idx)
865                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
866                                 }
867                         }
868
869                         if(a) {
870                                 neighb_t *n;
871
872                                 co_gs_foreach_neighb(a, n) {
873                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
874                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
875                                                 if(idx < n_idx)
876                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
877                                         }
878                                 }
879                         }
880                 }
881         }
882 }
883
884 typedef struct _appel_clique_walker_t {
885         phase_t ph;
886         const copy_opt_t *co;
887         int curr_nr;
888         int node_count;
889         FILE *f;
890         int dumb;
891         int *color_map;
892         struct obstack obst;
893 } appel_clique_walker_t;
894
895 typedef struct _appel_block_info_t {
896         int *live_end_nr;
897         int *live_in_nr;
898         int *phi_nr;
899         ir_node **live_end;
900         ir_node **live_in;
901         ir_node **phi;
902         int n_live_end;
903         int n_live_in;
904         int n_phi;
905 } appel_block_info_t;
906
907 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
908 {
909 #if 0
910         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
911         int res = (int) freq;
912         return res == 0 ? 1 : res;
913 #else
914         ir_loop *loop = get_irn_loop(bl);
915         if(loop) {
916                 int d = get_loop_depth(loop);
917                 return 1 + d * d;
918         }
919         return 1;
920 #endif
921 }
922
923 static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old)
924 {
925         appel_block_info_t *res = NULL;
926
927         if(is_Block(irn)) {
928                 appel_clique_walker_t *d = (void *) phase;
929                 res = phase_alloc(phase, sizeof(res[0]));
930                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
931                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
932                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
933                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
934                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
935                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
936         }
937
938         return res;
939 }
940
941 typedef struct _insn_list_t {
942         be_insn_t *insn;
943         struct list_head list;
944 } insn_list_t;
945
946 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
947 {
948         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
949         int i;
950
951         for(i = 0; i < bli->n_live_end; ++i)
952                 if(bli->live_end[i] == irn)
953                         return bli->live_end_nr[i];
954
955         return -1;
956 }
957
958 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
959 {
960         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
961         ir_node *irn;
962         int n_live;
963         int j;
964
965         n_live = 0;
966         foreach_pset(live, irn)
967                 live_arr[n_live++] = irn;
968
969         /* dump the live after clique */
970         if(!env->dumb) {
971                 for(j = 0; j < n_live; ++j) {
972                         int k;
973
974                         for(k = j + 1; k < n_live; ++k) {
975                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
976                         }
977                         fprintf(env->f, "\n");
978                 }
979         }
980
981         /* dump the affinities */
982         for(j = 0; !env->dumb && j < n_live; ++j) {
983                 ir_node *irn = live_arr[j];
984                 int old_nr = PTR_TO_INT(get_irn_link(irn));
985
986                 /* if the node was already live in the last insn dump the affinity */
987                 if(old_nr > start_nr) {
988                         int weight = appel_aff_weight(env, bl);
989                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
990                 }
991         }
992
993         /* set the current numbers into the link field. */
994         for(j = 0; j < n_live; ++j) {
995                 ir_node *irn = live_arr[j];
996                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
997         }
998
999         return curr_nr + n_live;
1000 }
1001
1002 static void appel_walker(ir_node *bl, void *data)
1003 {
1004         appel_clique_walker_t *env = data;
1005         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1006         struct obstack *obst       = &env->obst;
1007         void *base                 = obstack_base(obst);
1008         pset *live                 = pset_new_ptr_default();
1009         be_lv_t *lv                = env->co->cenv->birg->lv;
1010
1011         int n_insns  = 0;
1012         int n_nodes  = 0;
1013         int start_nr = env->curr_nr;
1014         int curr_nr  = start_nr;
1015
1016         be_insn_env_t insn_env;
1017         int i, j;
1018         ir_node *irn;
1019         be_insn_t **insns;
1020
1021         insn_env.aenv = env->co->aenv;
1022         insn_env.cls  = env->co->cls;
1023         insn_env.obst = obst;
1024         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1025
1026         /* Guess how many insns will be in this block. */
1027         sched_foreach(bl, irn)
1028                 n_nodes++;
1029
1030         bli->n_phi = 0;
1031         insns = malloc(n_nodes * sizeof(insns[0]));
1032
1033         /* Put all insns in an array. */
1034         irn = sched_first(bl);
1035         while(!sched_is_end(irn)) {
1036                 be_insn_t *insn;
1037                 insn = be_scan_insn(&insn_env, irn);
1038                 insns[n_insns++] = insn;
1039                 irn = insn->next_insn;
1040         }
1041
1042         DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl));
1043         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live);
1044
1045         /* Generate the bad and ugly. */
1046         for(i = n_insns - 1; i >= 0; --i) {
1047                 be_insn_t *insn = insns[i];
1048
1049                 /* The first live set has to be saved in the block border set. */
1050                 if(i == n_insns - 1) {
1051                         j = 0;
1052                         foreach_pset(live, irn) {
1053                                 bli->live_end[j]    = irn;
1054                                 bli->live_end_nr[j] = curr_nr + j;
1055                                 ++j;
1056                         }
1057                         bli->n_live_end = j;
1058                 }
1059
1060                 if(!env->dumb) {
1061                         for(j = 0; j < insn->use_start; ++j) {
1062                                 ir_node *op   = insn->ops[j].carrier;
1063                                 bitset_t *adm = insn->ops[j].regs;
1064                                 int k;
1065                                 int nr;
1066
1067                                 if(!insn->ops[j].has_constraints)
1068                                         continue;
1069
1070                                 nr = 0;
1071                                 foreach_pset(live, irn) {
1072                                         if(irn == op) {
1073                                                 pset_break(live);
1074                                                 break;
1075                                         }
1076                                         ++nr;
1077                                 }
1078
1079                                 assert(nr < pset_count(live));
1080
1081                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1082                                         int mapped_col = env->color_map[k];
1083                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1084                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1085                                 }
1086                         }
1087                 }
1088
1089                 /* dump the clique and update the stuff. */
1090                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1091
1092                 /* remove all defs. */
1093                 for(j = 0; j < insn->use_start; ++j)
1094                         pset_remove_ptr(live, insn->ops[j].carrier);
1095
1096                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1097                         bli->phi[bli->n_phi]    = insn->irn;
1098                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1099                         bli->n_phi++;
1100                 }
1101
1102                 /* add all uses */
1103                 else
1104                         for(j = insn->use_start; j < insn->n_ops; ++j)
1105                                 pset_insert_ptr(live, insn->ops[j].carrier);
1106         }
1107
1108         /* print the start clique. */
1109         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1110
1111         i = 0;
1112         foreach_pset(live, irn) {
1113                 bli->live_in[i]    = irn;
1114                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1115                 ++i;
1116         }
1117         bli->n_live_in = i;
1118
1119         del_pset(live);
1120         free(insns);
1121         obstack_free(obst, base);
1122         env->curr_nr = curr_nr;
1123 }
1124
1125 static void appel_inter_block_aff(ir_node *bl, void *data)
1126 {
1127         appel_clique_walker_t *env = data;
1128         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1129
1130         int i, j, n;
1131
1132         for(i = 0; i < bli->n_live_in; ++i) {
1133                 ir_node *irn = bli->live_in[i];
1134
1135                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1136                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1137
1138                         int nr = appel_get_live_end_nr(env, pred, irn);
1139                         assert(nr >= 0);
1140                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1141                 }
1142         }
1143
1144         for(i = 0; i < bli->n_phi; ++i) {
1145                 ir_node *irn = bli->phi[i];
1146
1147                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1148                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1149                         ir_node *op = get_irn_n(irn, j);
1150
1151                         int nr = appel_get_live_end_nr(env, pred, op);
1152                         assert(nr >= 0);
1153                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1154                 }
1155         }
1156
1157 }
1158
1159 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1160 {
1161         int i;
1162         int n_colors;
1163         appel_clique_walker_t env;
1164         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1165         be_lv_t *lv = co->cenv->birg->lv;
1166
1167         be_liveness_recompute(lv);
1168         obstack_init(&env.obst);
1169         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init);
1170         env.curr_nr = co->cls->n_regs;
1171         env.co = co;
1172         env.f = f;
1173
1174         bitset_copy(adm, co->cenv->ignore_colors);
1175         bitset_flip_all(adm);
1176
1177         /* Make color map. */
1178         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1179         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1180                 const arch_register_t *reg = &co->cls->regs[i];
1181                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1182         }
1183
1184         env.dumb = 1;
1185         env.curr_nr = n_colors;
1186         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1187         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1188
1189         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1190
1191         /* make the first k nodes interfere */
1192         for(i = 0; i < n_colors; ++i) {
1193                 int j;
1194                 for(j = i + 1; j < n_colors; ++j)
1195                         fprintf(f, "%d %d -1 ", i, j);
1196                 fprintf(f, "\n");
1197         }
1198
1199         env.dumb = 0;
1200         env.curr_nr = n_colors;
1201         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1202         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1203         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1204         obstack_free(&env.obst, NULL);
1205 }
1206
1207 /*
1208          ___ _____ ____   ____   ___ _____   ____                        _
1209         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1210          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1211          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1212         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1213                                                                   |_|            |___/
1214 */
1215
1216 static const char *get_dot_color_name(int col)
1217 {
1218         static const char *names[] = {
1219                 "blue",
1220                 "red",
1221                 "green",
1222                 "yellow",
1223                 "cyan",
1224                 "magenta",
1225                 "orange",
1226                 "chocolate",
1227                 "beige",
1228                 "navy",
1229                 "darkgreen",
1230                 "darkred",
1231                 "lightPink",
1232                 "chartreuse",
1233                 "lightskyblue",
1234                 "linen",
1235                 "pink",
1236                 "lightslateblue",
1237                 "mintcream",
1238                 "red",
1239                 "darkolivegreen",
1240                 "mediumblue",
1241                 "mistyrose",
1242                 "salmon",
1243                 "darkseagreen",
1244                 "mediumslateblue"
1245                 "moccasin",
1246                 "tomato",
1247                 "forestgreen",
1248                 "darkturquoise",
1249                 "palevioletred"
1250         };
1251
1252         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1253 }
1254
1255 typedef struct _co_ifg_dump_t {
1256         const copy_opt_t *co;
1257         unsigned flags;
1258 } co_ifg_dump_t;
1259
1260 static void ifg_dump_graph_attr(FILE *f, void *self)
1261 {
1262         fprintf(f, "overlap=scale");
1263 }
1264
1265 static int ifg_is_dump_node(void *self, ir_node *irn)
1266 {
1267         co_ifg_dump_t *cod = self;
1268         return !arch_irn_is(cod->co->aenv, irn, ignore);
1269 }
1270
1271 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1272 {
1273         co_ifg_dump_t *env         = self;
1274         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1275         arch_register_req_t req;
1276         int limited;
1277
1278         arch_get_register_req(env->co->aenv, &req, irn, BE_OUT_POS(0));
1279         limited = arch_register_req_is(&req, limited);
1280
1281         if(env->flags & CO_IFG_DUMP_LABELS) {
1282                 ir_fprintf(f, "label=\"%+F", irn);
1283
1284                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1285                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1286                         req.limited(req.limited_env, bs);
1287                         ir_fprintf(f, "\\n%B", bs);
1288                 }
1289                 ir_fprintf(f, "\" ");
1290         }
1291
1292         else
1293                 fprintf(f, "label=\"\" shape=point " );
1294
1295         if(env->flags & CO_IFG_DUMP_SHAPE)
1296                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1297
1298         if(env->flags & CO_IFG_DUMP_COLORS)
1299                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1300 }
1301
1302 static void ifg_dump_at_end(FILE *file, void *self)
1303 {
1304         co_ifg_dump_t *env = self;
1305         affinity_node_t *a;
1306
1307         co_gs_foreach_aff_node(env->co, a) {
1308                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1309                 unsigned aidx = get_irn_idx(a->irn);
1310                 neighb_t *n;
1311
1312                 co_gs_foreach_neighb(a, n) {
1313                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1314                         unsigned nidx = get_irn_idx(n->irn);
1315
1316                         if(aidx < nidx) {
1317                                 const char *color = nr == ar ? "blue" : "red";
1318                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1319                                 if(env->flags & CO_IFG_DUMP_LABELS)
1320                                         fprintf(file, "label=\"%d\" ", n->costs);
1321                                 if(env->flags & CO_IFG_DUMP_COLORS)
1322                                         fprintf(file, "color=%s ", color);
1323                                 else
1324                                         fprintf(file, "style=dotted");
1325                                 fprintf(file, "];\n");
1326                         }
1327                 }
1328         }
1329 }
1330
1331
1332 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1333         ifg_is_dump_node,
1334         ifg_dump_graph_attr,
1335         ifg_dump_node_attr,
1336         NULL,
1337         NULL,
1338         ifg_dump_at_end
1339 };
1340
1341
1342
1343 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1344 {
1345         co_ifg_dump_t cod;
1346
1347         cod.co    = co;
1348         cod.flags = flags;
1349         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1350 }
1351
1352
1353 void co_solve_park_moon(copy_opt_t *opt)
1354 {
1355
1356 }
1357
1358 static int void_algo(copy_opt_t *co)
1359 {
1360         return 0;
1361 }
1362
1363 /*
1364                 _    _                  _ _   _
1365            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1366           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1367          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1368         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1369                            |___/
1370 */
1371
1372 typedef struct {
1373         co_algo_t *algo;
1374         const char *name;
1375         int can_improve_existing;
1376 } co_algo_info_t;
1377
1378 static co_algo_info_t algos[] = {
1379         { void_algo,                "none",  0 },
1380         { co_solve_heuristic,       "heur1", 0 },
1381         { co_solve_heuristic_new,   "heur2", 0 },
1382         { co_solve_heuristic_java,  "heur3", 0 },
1383 #ifdef WITH_ILP
1384         { co_solve_ilp2,            "ilp",   1 },
1385 #endif
1386         { NULL,                     "",      0 }
1387 };
1388
1389 /*
1390     __  __       _         ____       _
1391    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1392    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1393    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1394    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1395
1396 */
1397
1398 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1399 {
1400         FILE *result;
1401         char buf[1024];
1402
1403         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1404         result = fopen(buf, "wt");
1405         if(result == NULL) {
1406                 panic("Couldn't open '%s' for writing.", buf);
1407         }
1408
1409         return result;
1410 }
1411
1412 void co_driver(be_chordal_env_t *cenv)
1413 {
1414         lc_timer_t *timer = lc_timer_register("firm.be.copyopt", "runtime");
1415         co_complete_stats_t before, after;
1416         copy_opt_t *co;
1417         co_algo_t  *algo_func;
1418         int was_optimal = 0;
1419
1420         if(algo < 0 || algo >= CO_ALGO_LAST)
1421                 return;
1422
1423         co = new_copy_opt(cenv, cost_func);
1424         co_build_ou_structure(co);
1425         co_build_graph_structure(co);
1426
1427         co_complete_stats(co, &before);
1428
1429         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1430         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1431         be_stat_ev_ull("co_max_costs",    before.max_costs);
1432         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1433         be_stat_ev_ull("co_aff_int",      before.aff_int);
1434
1435         be_stat_ev_ull("co_init_costs",   before.costs);
1436         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1437
1438         /* Dump the interference graph in Appel's format. */
1439         if(dump_flags & DUMP_APPEL) {
1440                 FILE *f = my_open(cenv, "", ".apl");
1441                 co_dump_appel_graph(co, f);
1442                 fclose(f);
1443         }
1444
1445         if(dump_flags & DUMP_BEFORE) {
1446                 FILE *f = my_open(cenv, "", "-before.dot");
1447                 co_dump_ifg_dot(co, f, style_flags);
1448                 fclose(f);
1449         }
1450
1451         /* if the algo can improve results, provide an initial solution with heur3 */
1452         if(improve && algos[algo].can_improve_existing) {
1453                 co_complete_stats_t stats;
1454
1455                 /* produce a heuristical solution */
1456                 co_solve_heuristic_java(co);
1457
1458                 /* do the stats and provide the current costs */
1459                 co_complete_stats(co, &stats);
1460                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1461         }
1462
1463 #ifdef WITH_JVM
1464         /* start the JVM here so that it does not tamper the timing. */
1465         if(algo == CO_ALGO_HEUR3)
1466                 be_java_coal_start_jvm();
1467 #endif
1468
1469         algo_func = algos[algo].algo;
1470
1471         lc_timer_reset_and_start(timer);
1472
1473         was_optimal = algo_func(co);
1474
1475         lc_timer_stop(timer);
1476         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1477
1478         be_stat_ev_ull("co_optimal", was_optimal);
1479
1480         if(dump_flags & DUMP_AFTER) {
1481                 FILE *f = my_open(cenv, "", "-after.dot");
1482                 co_dump_ifg_dot(co, f, style_flags);
1483                 fclose(f);
1484         }
1485
1486         co_complete_stats(co, &after);
1487
1488         if(do_stats) {
1489                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1490                 ulong64 evitable          = after.costs     - after.inevit_costs;
1491
1492                 ir_printf("%30F ", cenv->irg);
1493                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1494
1495                 if(optimizable_costs > 0)
1496                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1497                 else
1498                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1499         }
1500
1501         be_stat_ev_ull("co_after_costs", after.costs);
1502         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1503
1504         co_free_graph_structure(co);
1505         co_free_ou_structure(co);
1506         free_copy_opt(co);
1507 }