updated cpu architectures
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "benodesets.h"
64 #include "bejavacoal.h"
65 #include "bestatevent.h"
66 #include "beirg_t.h"
67 #include "error.h"
68
69 #include <libcore/lc_timing.h>
70 #include <libcore/lc_opts.h>
71 #include <libcore/lc_opts_enum.h>
72
73 #define DUMP_BEFORE 1
74 #define DUMP_AFTER  2
75 #define DUMP_APPEL  4
76 #define DUMP_ALL    2 * DUMP_APPEL - 1
77
78 #define COST_FUNC_FREQ     1
79 #define COST_FUNC_LOOP     2
80 #define COST_FUNC_ALL_ONE  3
81
82 static unsigned   dump_flags  = 0;
83 static unsigned   style_flags = 0;
84 static unsigned   do_stats    = 0;
85 static cost_fct_t cost_func   = co_get_costs_exec_freq;
86 static unsigned   algo        = CO_ALGO_HEUR4;
87 static int        improve     = 1;
88
89 static const lc_opt_enum_mask_items_t dump_items[] = {
90         { "before",  DUMP_BEFORE },
91         { "after",   DUMP_AFTER  },
92         { "appel",   DUMP_APPEL  },
93         { "all",     DUMP_ALL    },
94         { NULL,      0 }
95 };
96
97 static const lc_opt_enum_mask_items_t style_items[] = {
98         { "color",   CO_IFG_DUMP_COLORS },
99         { "labels",  CO_IFG_DUMP_LABELS },
100         { "constr",  CO_IFG_DUMP_CONSTR },
101         { "shape",   CO_IFG_DUMP_SHAPE  },
102         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
103         { NULL,      0 }
104 };
105
106 static const lc_opt_enum_mask_items_t algo_items[] = {
107         { "none",   CO_ALGO_NONE  },
108         { "heur",   CO_ALGO_HEUR  },
109         { "heur2",  CO_ALGO_HEUR2 },
110 #ifdef WITH_JVM
111         { "heur3",  CO_ALGO_HEUR3 },
112 #endif /* WITH_JVM */
113         { "heur4",  CO_ALGO_HEUR4 },
114 #ifdef WITH_ILP
115         { "ilp",    CO_ALGO_ILP   },
116 #endif /* WITH_ILP */
117         { NULL,     0 }
118 };
119
120 typedef int (*opt_funcptr)(void);
121
122 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
123         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
124         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
125         { "one",    (opt_funcptr) co_get_costs_all_one },
126         { NULL,     NULL }
127 };
128
129 static lc_opt_enum_mask_var_t dump_var = {
130         &dump_flags, dump_items
131 };
132
133 static lc_opt_enum_mask_var_t style_var = {
134         &style_flags, style_items
135 };
136
137 static lc_opt_enum_mask_var_t algo_var = {
138         &algo, algo_items
139 };
140
141 static lc_opt_enum_func_ptr_var_t cost_func_var = {
142         (opt_funcptr*) &cost_func, cost_func_items
143 };
144
145 static const lc_opt_table_entry_t options[] = {
146         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
147         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
148         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
149         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
150         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
151         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
152         LC_OPT_LAST
153 };
154
155 /* Insert additional options registration functions here. */
156 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
157 extern void be_co2_register_options(lc_opt_entry_t *grp);
158 extern void be_co3_register_options(lc_opt_entry_t *grp);
159
160 void be_init_copycoal(void)
161 {
162         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
163         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
164         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
165         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
166
167         lc_opt_add_table(co_grp, options);
168 }
169
170 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
171
172 #undef QUICK_AND_DIRTY_HACK
173
174 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
175 {
176         if (env->ifg)
177                 return be_ifg_connected(env->ifg, a, b);
178         else
179                 return values_interfere(env->birg, a, b);
180 }
181
182
183 /******************************************************************************
184     _____                           _
185    / ____|                         | |
186   | |  __  ___ _ __   ___ _ __ __ _| |
187   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
188   | |__| |  __/ | | |  __/ | | (_| | |
189    \_____|\___|_| |_|\___|_|  \__,_|_|
190
191  ******************************************************************************/
192
193 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
194
195
196 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
197 {
198         const char *s1, *s2, *s3;
199         int len;
200         copy_opt_t *co;
201
202         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
203
204         co = xcalloc(1, sizeof(*co));
205         co->cenv      = chordal_env;
206         co->aenv      = chordal_env->birg->main_env->arch_env;
207         co->irg       = chordal_env->irg;
208         co->cls       = chordal_env->cls;
209         co->get_costs = get_costs;
210
211         s1 = get_irp_prog_name();
212         s2 = get_entity_name(get_irg_entity(co->irg));
213         s3 = chordal_env->cls->name;
214         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
215         co->name = xmalloc(len);
216         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
217
218         return co;
219 }
220
221 void free_copy_opt(copy_opt_t *co) {
222         xfree(co->name);
223         free(co);
224 }
225
226 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
227         const arch_register_req_t *req;
228         const arch_register_t *reg;
229
230         if (arch_irn_is(co->aenv, irn, ignore))
231                 return 0;
232
233         reg = arch_get_irn_register(co->aenv, irn);
234         if (arch_register_type_is(reg, ignore))
235                 return 0;
236
237         req = arch_get_register_req(co->aenv, irn, -1);
238         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
239                 return 1;
240
241         return 0;
242 }
243
244 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
245         int cost = 0;
246         ir_loop *loop;
247         ir_node *root_block = get_nodes_block(root);
248         (void) co;
249         (void) arg;
250
251         if (is_Phi(root)) {
252                 /* for phis the copies are placed in the corresponding pred-block */
253                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
254         } else {
255                 /* a perm places the copy in the same block as it resides */
256                 loop = get_irn_loop(root_block);
257         }
258         if (loop) {
259                 int d = get_loop_depth(loop);
260                 cost = d*d;
261         }
262         return 1+cost;
263 }
264
265 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
266         int res;
267         ir_node *root_bl = get_nodes_block(root);
268         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
269         (void) arg;
270         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
271
272         /* don't allow values smaller than one. */
273         return res < 1 ? 1 : res;
274 }
275
276
277 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
278         (void) co;
279         (void) root;
280         (void) arg;
281         (void) pos;
282         return 1;
283 }
284
285 /******************************************************************************
286    ____        _   _    _       _ _          _____ _
287   / __ \      | | | |  | |     (_) |        / ____| |
288  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
289  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
290  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
291   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
292         | |                                                            __/ |
293         |_|                                                           |___/
294  ******************************************************************************/
295
296 /**
297  * Determines a maximum weighted independent set with respect to
298  * the interference and conflict edges of all nodes in a qnode.
299  */
300 static int ou_max_ind_set_costs(unit_t *ou) {
301         be_chordal_env_t *chordal_env = ou->co->cenv;
302         ir_node **safe, **unsafe;
303         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
304         bitset_t *curr;
305         bitset_pos_t pos;
306         int max, curr_weight, best_weight = 0;
307
308         /* assign the nodes into two groups.
309          * safe: node has no interference, hence it is in every max stable set.
310          * unsafe: node has an interference
311          */
312         safe = alloca((ou->node_count-1) * sizeof(*safe));
313         safe_costs = 0;
314         safe_count = 0;
315         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
316         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
317         unsafe_count = 0;
318         for(i=1; i<ou->node_count; ++i) {
319                 int is_safe = 1;
320                 for(o=1; o<ou->node_count; ++o) {
321                         if (i==o)
322                                 continue;
323                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
324                                 unsafe_costs[unsafe_count] = ou->costs[i];
325                                 unsafe[unsafe_count] = ou->nodes[i];
326                                 ++unsafe_count;
327                                 is_safe = 0;
328                                 break;
329                         }
330                 }
331                 if (is_safe) {
332                         safe_costs += ou->costs[i];
333                         safe[safe_count++] = ou->nodes[i];
334                 }
335         }
336
337
338         /* now compute the best set out of the unsafe nodes*/
339         if (unsafe_count > MIS_HEUR_TRIGGER) {
340                 bitset_t *best = bitset_alloca(unsafe_count);
341                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
342                 for (i=0; i<unsafe_count; ++i) {
343                         bitset_set(best, i);
344                         /* check if it is a stable set */
345                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
346                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
347                                         bitset_clear(best, i); /* clear the bit and try next one */
348                                         break;
349                                 }
350                 }
351                 /* compute the weight */
352                 bitset_foreach(best, pos)
353                         best_weight += unsafe_costs[pos];
354         } else {
355                 /* Exact Algorithm: Brute force */
356                 curr = bitset_alloca(unsafe_count);
357                 bitset_set_all(curr);
358                 while ((max = bitset_popcnt(curr)) != 0) {
359                         /* check if curr is a stable set */
360                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
361                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
362                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
363                                                         goto no_stable_set;
364
365                         /* if we arrive here, we have a stable set */
366                         /* compute the weigth of the stable set*/
367                         curr_weight = 0;
368                         bitset_foreach(curr, pos)
369                                 curr_weight += unsafe_costs[pos];
370
371                         /* any better ? */
372                         if (curr_weight > best_weight) {
373                                 best_weight = curr_weight;
374                         }
375
376         no_stable_set:
377                         bitset_minus1(curr);
378                 }
379         }
380
381         return safe_costs+best_weight;
382 }
383
384 static void co_collect_units(ir_node *irn, void *env) {
385         copy_opt_t *co = env;
386         unit_t *unit;
387
388         if (!is_curr_reg_class(co, irn))
389                 return;
390         if (!co_is_optimizable_root(co, irn))
391                 return;
392
393         /* Init a new unit */
394         unit = xcalloc(1, sizeof(*unit));
395         unit->co = co;
396         unit->node_count = 1;
397         INIT_LIST_HEAD(&unit->queue);
398
399         /* Phi with some/all of its arguments */
400         if (is_Reg_Phi(irn)) {
401                 int i, arity;
402
403                 /* init */
404                 arity = get_irn_arity(irn);
405                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
406                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
407                 unit->nodes[0] = irn;
408
409                 /* fill */
410                 for (i=0; i<arity; ++i) {
411                         int o, arg_pos;
412                         ir_node *arg = get_irn_n(irn, i);
413
414                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
415                         if (arg == irn)
416                                 continue;
417                         if (nodes_interfere(co->cenv, irn, arg)) {
418                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
419                                 continue;
420                         }
421
422                         /* Else insert the argument of the phi to the members of this ou */
423                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
424
425                         if (! arch_irn_is(co->aenv, arg, ignore)) {
426                                 /* Check if arg has occurred at a prior position in the arg/list */
427                                 arg_pos = 0;
428                                 for (o=1; o<unit->node_count; ++o) {
429                                         if (unit->nodes[o] == arg) {
430                                                 arg_pos = o;
431                                                 break;
432                                         }
433                                 }
434
435                                 if (!arg_pos) { /* a new argument */
436                                         /* insert node, set costs */
437                                         unit->nodes[unit->node_count] = arg;
438                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
439                                         unit->node_count++;
440                                 } else { /* arg has occurred before in same phi */
441                                         /* increase costs for existing arg */
442                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
443                                 }
444                         }
445                 }
446                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
447                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
448         } else if (is_Perm_Proj(co->aenv, irn)) {
449                 /* Proj of a perm with corresponding arg */
450                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
451                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
452                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
453                 unit->node_count = 2;
454                 unit->nodes[0] = irn;
455                 unit->nodes[1] = get_Perm_src(irn);
456                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
457         } else {
458                 const arch_register_req_t *req =
459                         arch_get_register_req(co->aenv, irn, -1);
460
461                 /* Src == Tgt of a 2-addr-code instruction */
462                 if (is_2addr_code(req)) {
463                         const unsigned other = req->other_same;
464                         int            count = 0;
465                         int            i;
466
467                         for (i = 0; (1U << i) <= other; ++i) {
468                                 if (other & (1U << i)) {
469                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
470                                         if (!arch_irn_is(co->aenv, o, ignore) &&
471                                                         !nodes_interfere(co->cenv, irn, o)) {
472                                                 ++count;
473                                         }
474                                 }
475                         }
476
477                         if (count != 0) {
478                                 int k = 0;
479                                 ++count;
480                                 unit->nodes = xmalloc(count * sizeof(*unit->nodes));
481                                 unit->costs = xmalloc(count * sizeof(*unit->costs));
482                                 unit->node_count = count;
483                                 unit->nodes[k++] = irn;
484
485                                 for (i = 0; 1U << i <= other; ++i) {
486                                         if (other & (1U << i)) {
487                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
488                                                 if (!arch_irn_is(co->aenv, o, ignore) &&
489                                                                 !nodes_interfere(co->cenv, irn, o)) {
490                                                         unit->nodes[k] = o;
491                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
492                                                         ++k;
493                                                 }
494                                         }
495                                 }
496                         }
497                 } else {
498                         assert(0 && "This is not an optimizable node!");
499                 }
500         }
501
502         /* Insert the new unit at a position according to its costs */
503         if (unit->node_count > 1) {
504                 int i;
505                 struct list_head *tmp;
506
507                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
508                 for(i=1; i<unit->node_count; ++i) {
509                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
510                         unit->all_nodes_costs += unit->costs[i];
511                 }
512
513                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
514                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
515                 /* Insert the new ou according to its sort_key */
516                 tmp = &co->units;
517                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
518                         tmp = tmp->next;
519                 list_add(&unit->units, tmp);
520         } else {
521                 free(unit);
522         }
523 }
524
525 #ifdef QUICK_AND_DIRTY_HACK
526
527 static int compare_ous(const void *k1, const void *k2) {
528         const unit_t *u1 = *((const unit_t **) k1);
529         const unit_t *u2 = *((const unit_t **) k2);
530         int i, o, u1_has_constr, u2_has_constr;
531         arch_register_req_t req;
532         const arch_env_t *aenv = u1->co->aenv;
533
534         /* Units with constraints come first */
535         u1_has_constr = 0;
536         for (i=0; i<u1->node_count; ++i) {
537                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
538                 if (arch_register_req_is(&req, limited)) {
539                         u1_has_constr = 1;
540                         break;
541                 }
542         }
543
544         u2_has_constr = 0;
545         for (i=0; i<u2->node_count; ++i) {
546                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
547                 if (arch_register_req_is(&req, limited)) {
548                         u2_has_constr = 1;
549                         break;
550                 }
551         }
552
553         if (u1_has_constr != u2_has_constr)
554                 return u2_has_constr - u1_has_constr;
555
556         /* Now check, whether the two units are connected */
557 #if 0
558         for (i=0; i<u1->node_count; ++i)
559                 for (o=0; o<u2->node_count; ++o)
560                         if (u1->nodes[i] == u2->nodes[o])
561                                 return 0;
562 #endif
563
564         /* After all, the sort key decides. Greater keys come first. */
565         return u2->sort_key - u1->sort_key;
566
567 }
568
569 /**
570  * Sort the ou's according to constraints and their sort_key
571  */
572 static void co_sort_units(copy_opt_t *co) {
573         int i, count = 0, costs;
574         unit_t *ou, **ous;
575
576         /* get the number of ous, remove them form the list and fill the array */
577         list_for_each_entry(unit_t, ou, &co->units, units)
578                 count++;
579         ous = alloca(count * sizeof(*ous));
580
581         costs = co_get_max_copy_costs(co);
582
583         i = 0;
584         list_for_each_entry(unit_t, ou, &co->units, units)
585                 ous[i++] = ou;
586
587         INIT_LIST_HEAD(&co->units);
588
589         assert(count == i && list_empty(&co->units));
590
591         for (i=0; i<count; ++i)
592                 ir_printf("%+F\n", ous[i]->nodes[0]);
593
594         qsort(ous, count, sizeof(*ous), compare_ous);
595
596         ir_printf("\n\n");
597         for (i=0; i<count; ++i)
598                 ir_printf("%+F\n", ous[i]->nodes[0]);
599
600         /* reinsert into list in correct order */
601         for (i=0; i<count; ++i)
602                 list_add_tail(&ous[i]->units, &co->units);
603
604         assert(costs == co_get_max_copy_costs(co));
605 }
606 #endif
607
608 void co_build_ou_structure(copy_opt_t *co) {
609         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
610         INIT_LIST_HEAD(&co->units);
611         irg_walk_graph(co->irg, co_collect_units, NULL, co);
612 #ifdef QUICK_AND_DIRTY_HACK
613         co_sort_units(co);
614 #endif
615 }
616
617 void co_free_ou_structure(copy_opt_t *co) {
618         unit_t *curr, *tmp;
619         ASSERT_OU_AVAIL(co);
620         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
621                 xfree(curr->nodes);
622                 xfree(curr->costs);
623                 xfree(curr);
624         }
625         co->units.next = NULL;
626 }
627
628 /* co_solve_heuristic() is implemented in becopyheur.c */
629
630 int co_get_max_copy_costs(const copy_opt_t *co) {
631         int i, res = 0;
632         unit_t *curr;
633
634         ASSERT_OU_AVAIL(co);
635
636         list_for_each_entry(unit_t, curr, &co->units, units) {
637                 res += curr->inevitable_costs;
638                 for (i=1; i<curr->node_count; ++i)
639                         res += curr->costs[i];
640         }
641         return res;
642 }
643
644 int co_get_inevit_copy_costs(const copy_opt_t *co) {
645         int res = 0;
646         unit_t *curr;
647
648         ASSERT_OU_AVAIL(co);
649
650         list_for_each_entry(unit_t, curr, &co->units, units)
651                 res += curr->inevitable_costs;
652         return res;
653 }
654
655 int co_get_copy_costs(const copy_opt_t *co) {
656         int i, res = 0;
657         unit_t *curr;
658
659         ASSERT_OU_AVAIL(co);
660
661         list_for_each_entry(unit_t, curr, &co->units, units) {
662                 int root_col = get_irn_col(co, curr->nodes[0]);
663                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
664                 res += curr->inevitable_costs;
665                 for (i=1; i<curr->node_count; ++i) {
666                         int arg_col = get_irn_col(co, curr->nodes[i]);
667                         if (root_col != arg_col) {
668                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
669                                 res += curr->costs[i];
670                         }
671                 }
672         }
673         return res;
674 }
675
676 int co_get_lower_bound(const copy_opt_t *co) {
677         int res = 0;
678         unit_t *curr;
679
680         ASSERT_OU_AVAIL(co);
681
682         list_for_each_entry(unit_t, curr, &co->units, units)
683                 res += curr->inevitable_costs + curr->min_nodes_costs;
684         return res;
685 }
686
687 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
688 {
689         bitset_t *seen = bitset_irg_malloc(co->irg);
690         affinity_node_t *an;
691
692         memset(stat, 0, sizeof(stat[0]));
693
694         /* count affinity edges. */
695         co_gs_foreach_aff_node(co, an) {
696                 neighb_t *neigh;
697                 stat->aff_nodes += 1;
698                 bitset_add_irn(seen, an->irn);
699                 co_gs_foreach_neighb(an, neigh) {
700                         if(!bitset_contains_irn(seen, neigh->irn)) {
701                                 stat->aff_edges += 1;
702                                 stat->max_costs += neigh->costs;
703
704                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
705                                         stat->costs += neigh->costs;
706                                         stat->unsatisfied_edges += 1;
707                                 }
708
709                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
710                                         stat->aff_int += 1;
711                                         stat->inevit_costs += neigh->costs;
712                                 }
713
714                         }
715                 }
716         }
717
718         bitset_free(seen);
719 }
720
721 /******************************************************************************
722    _____                 _        _____ _
723   / ____|               | |      / ____| |
724  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
725  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
726  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
727   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
728                   | |                                       __/ |
729                   |_|                                      |___/
730  ******************************************************************************/
731
732 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
733         const affinity_node_t *n1 = k1;
734         const affinity_node_t *n2 = k2;
735         (void) size;
736
737         return (n1->irn != n2->irn);
738 }
739
740 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
741         affinity_node_t new_node, *node;
742         neighb_t        *nbr;
743         int             allocnew = 1;
744
745         new_node.irn        = n1;
746         new_node.degree     = 0;
747         new_node.neighbours = NULL;
748         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
749
750         for (nbr = node->neighbours; nbr; nbr = nbr->next)
751                 if (nbr->irn == n2) {
752                         allocnew = 0;
753                         break;
754                 }
755
756         /* if we did not find n2 in n1's neighbourhood insert it */
757         if (allocnew) {
758                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
759                 nbr->irn   = n2;
760                 nbr->costs = 0;
761                 nbr->next  = node->neighbours;
762
763                 node->neighbours = nbr;
764                 node->degree++;
765         }
766
767         /* now nbr points to n1's neighbour-entry of n2 */
768         nbr->costs += costs;
769 }
770
771 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
772         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
773                 add_edge(co, n1, n2, costs);
774                 add_edge(co, n2, n1, costs);
775         }
776 }
777
778 static void build_graph_walker(ir_node *irn, void *env) {
779         copy_opt_t *co = env;
780         int pos, max;
781         const arch_register_t *reg;
782
783         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
784                 return;
785
786         reg = arch_get_irn_register(co->aenv, irn);
787         if (arch_register_type_is(reg, ignore))
788                 return;
789
790         if (is_Reg_Phi(irn)) { /* Phis */
791                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
792                         ir_node *arg = get_irn_n(irn, pos);
793                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
794                 }
795         }
796         else if (is_Perm_Proj(co->aenv, irn)) { /* Perms */
797                 ir_node *arg = get_Perm_src(irn);
798                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
799         }
800         else { /* 2-address code */
801                 const arch_register_req_t *req = arch_get_register_req(co->aenv, irn, -1);
802                 if (is_2addr_code(req)) {
803                         const unsigned other = req->other_same;
804                         int i;
805
806                         for (i = 0; 1U << i <= other; ++i) {
807                                 if (other & (1U << i)) {
808                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
809                                         if (! arch_irn_is(co->aenv, other, ignore))
810                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
811                                 }
812                         }
813                 }
814         }
815 }
816
817 void co_build_graph_structure(copy_opt_t *co) {
818         obstack_init(&co->obst);
819         co->nodes = new_set(compare_affinity_node_t, 32);
820
821         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
822 }
823
824 void co_free_graph_structure(copy_opt_t *co) {
825         ASSERT_GS_AVAIL(co);
826
827         del_set(co->nodes);
828         obstack_free(&co->obst, NULL);
829         co->nodes = NULL;
830 }
831
832 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
833
834 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
835         affinity_node_t new_node, *n;
836
837         ASSERT_GS_AVAIL(co);
838
839         new_node.irn = irn;
840         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
841         if (n) {
842                 return (n->degree > 0);
843         } else
844                 return 0;
845 }
846
847 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
848 {
849         be_ifg_t *ifg  = co->cenv->ifg;
850         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
851
852         ir_node *irn;
853         void *it, *nit;
854         int n, n_regs;
855         unsigned i;
856
857         n_regs = 0;
858         for(i = 0; i < co->cls->n_regs; ++i) {
859                 const arch_register_t *reg = &co->cls->regs[i];
860                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
861         }
862
863         /*
864          * n contains the first node number.
865          * the values below n are the pre-colored register nodes
866          */
867
868         it  = be_ifg_nodes_iter_alloca(ifg);
869         nit = be_ifg_neighbours_iter_alloca(ifg);
870
871         n = n_regs;
872         be_ifg_foreach_node(ifg, it, irn) {
873                 if(!arch_irn_is(co->aenv, irn, ignore))
874                         set_irn_link(irn, INT_TO_PTR(n++));
875         }
876
877         fprintf(f, "%d %d\n", n, n_regs);
878
879         be_ifg_foreach_node(ifg, it, irn) {
880                 if(!arch_irn_is(co->aenv, irn, ignore)) {
881                         int idx            = PTR_TO_INT(get_irn_link(irn));
882                         affinity_node_t *a = get_affinity_info(co, irn);
883
884                         const arch_register_req_t *req;
885                         ir_node *adj;
886
887                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
888                         if(arch_register_req_is(req, limited)) {
889                                 for(i = 0; i < co->cls->n_regs; ++i) {
890                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
891                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
892                                 }
893                         }
894
895
896                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
897                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
898                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
899                                         if(idx < adj_idx)
900                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
901                                 }
902                         }
903
904                         if(a) {
905                                 neighb_t *n;
906
907                                 co_gs_foreach_neighb(a, n) {
908                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
909                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
910                                                 if(idx < n_idx)
911                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
912                                         }
913                                 }
914                         }
915                 }
916         }
917 }
918
919 typedef struct _appel_clique_walker_t {
920         ir_phase ph;
921         const copy_opt_t *co;
922         int curr_nr;
923         int node_count;
924         FILE *f;
925         int dumb;
926         int *color_map;
927         struct obstack obst;
928 } appel_clique_walker_t;
929
930 typedef struct _appel_block_info_t {
931         int *live_end_nr;
932         int *live_in_nr;
933         int *phi_nr;
934         ir_node **live_end;
935         ir_node **live_in;
936         ir_node **phi;
937         int n_live_end;
938         int n_live_in;
939         int n_phi;
940 } appel_block_info_t;
941
942 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
943 {
944 #if 0
945         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
946         int res = (int) freq;
947         return res == 0 ? 1 : res;
948 #else
949         ir_loop *loop = get_irn_loop(bl);
950         (void) env;
951         if(loop) {
952                 int d = get_loop_depth(loop);
953                 return 1 + d * d;
954         }
955         return 1;
956 #endif
957 }
958
959 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
960 {
961         appel_block_info_t *res = NULL;
962         (void) old;
963
964         if(is_Block(irn)) {
965                 appel_clique_walker_t *d = (void *) phase;
966                 res = phase_alloc(phase, sizeof(res[0]));
967                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
968                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
969                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
970                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
971                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
972                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
973         }
974
975         return res;
976 }
977
978 typedef struct _insn_list_t {
979         be_insn_t *insn;
980         struct list_head list;
981 } insn_list_t;
982
983 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
984 {
985         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
986         int i;
987
988         for(i = 0; i < bli->n_live_end; ++i)
989                 if(bli->live_end[i] == irn)
990                         return bli->live_end_nr[i];
991
992         return -1;
993 }
994
995 static int appel_dump_clique(appel_clique_walker_t *env, const ir_nodeset_t *live, ir_node *bl, int curr_nr, int start_nr)
996 {
997         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
998         ir_node *irn;
999         int n_live;
1000         int j;
1001         ir_nodeset_iterator_t iter;
1002
1003         n_live = 0;
1004         foreach_ir_nodeset(live, irn, iter)
1005                 live_arr[n_live++] = irn;
1006
1007         /* dump the live after clique */
1008         if(!env->dumb) {
1009                 for(j = 0; j < n_live; ++j) {
1010                         int k;
1011
1012                         for(k = j + 1; k < n_live; ++k) {
1013                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
1014                         }
1015                         fprintf(env->f, "\n");
1016                 }
1017         }
1018
1019         /* dump the affinities */
1020         for(j = 0; !env->dumb && j < n_live; ++j) {
1021                 ir_node *irn = live_arr[j];
1022                 int old_nr = PTR_TO_INT(get_irn_link(irn));
1023
1024                 /* if the node was already live in the last insn dump the affinity */
1025                 if(old_nr > start_nr) {
1026                         int weight = appel_aff_weight(env, bl);
1027                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
1028                 }
1029         }
1030
1031         /* set the current numbers into the link field. */
1032         for(j = 0; j < n_live; ++j) {
1033                 ir_node *irn = live_arr[j];
1034                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1035         }
1036
1037         return curr_nr + n_live;
1038 }
1039
1040 static void appel_walker(ir_node *bl, void *data)
1041 {
1042         appel_clique_walker_t *env = data;
1043         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1044         struct obstack *obst       = &env->obst;
1045         void *base                 = obstack_base(obst);
1046         ir_nodeset_t live;
1047         ir_nodeset_iterator_t iter;
1048         be_lv_t *lv                = env->co->cenv->birg->lv;
1049
1050         int n_insns  = 0;
1051         int n_nodes  = 0;
1052         int start_nr = env->curr_nr;
1053         int curr_nr  = start_nr;
1054
1055         be_insn_env_t insn_env;
1056         int i, j;
1057         ir_node *irn;
1058         be_insn_t **insns;
1059
1060         insn_env.aenv = env->co->aenv;
1061         insn_env.cls  = env->co->cls;
1062         insn_env.obst = obst;
1063         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1064
1065         /* Guess how many insns will be in this block. */
1066         sched_foreach(bl, irn)
1067                 n_nodes++;
1068
1069         bli->n_phi = 0;
1070         insns = xmalloc(n_nodes * sizeof(insns[0]));
1071
1072         /* Put all insns in an array. */
1073         irn = sched_first(bl);
1074         while(!sched_is_end(irn)) {
1075                 be_insn_t *insn;
1076                 insn = be_scan_insn(&insn_env, irn);
1077                 insns[n_insns++] = insn;
1078                 irn = insn->next_insn;
1079         }
1080
1081         DBG((dbg, LEVEL_2, "%+F\n", bl));
1082         ir_nodeset_init(&live);
1083         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, &live);
1084
1085         /* Generate the bad and ugly. */
1086         for(i = n_insns - 1; i >= 0; --i) {
1087                 be_insn_t *insn = insns[i];
1088
1089                 /* The first live set has to be saved in the block border set. */
1090                 if(i == n_insns - 1) {
1091                         j = 0;
1092                         foreach_ir_nodeset(&live, irn, iter) {
1093                                 bli->live_end[j]    = irn;
1094                                 bli->live_end_nr[j] = curr_nr + j;
1095                                 ++j;
1096                         }
1097                         bli->n_live_end = j;
1098                 }
1099
1100                 if(!env->dumb) {
1101                         for(j = 0; j < insn->use_start; ++j) {
1102                                 ir_node *op   = insn->ops[j].carrier;
1103                                 bitset_t *adm = insn->ops[j].regs;
1104                                 unsigned k;
1105                                 size_t nr;
1106
1107                                 if(!insn->ops[j].has_constraints)
1108                                         continue;
1109
1110                                 nr = 0;
1111                                 foreach_ir_nodeset(&live, irn, iter) {
1112                                         if(irn == op) {
1113                                                 break;
1114                                         }
1115                                         ++nr;
1116                                 }
1117
1118                                 assert(nr < ir_nodeset_size(&live));
1119
1120                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1121                                         int mapped_col = env->color_map[k];
1122                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1123                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1124                                 }
1125                         }
1126                 }
1127
1128                 /* dump the clique and update the stuff. */
1129                 curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1130
1131                 /* remove all defs. */
1132                 for(j = 0; j < insn->use_start; ++j)
1133                         ir_nodeset_remove(&live, insn->ops[j].carrier);
1134
1135                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1136                         bli->phi[bli->n_phi]    = insn->irn;
1137                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1138                         bli->n_phi++;
1139                 }
1140
1141                 /* add all uses */
1142                 else
1143                         for(j = insn->use_start; j < insn->n_ops; ++j)
1144                                 ir_nodeset_insert(&live, insn->ops[j].carrier);
1145         }
1146
1147         /* print the start clique. */
1148         curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1149
1150         i = 0;
1151         foreach_ir_nodeset(&live, irn, iter) {
1152                 bli->live_in[i]    = irn;
1153                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1154                 ++i;
1155         }
1156         bli->n_live_in = i;
1157
1158         ir_nodeset_destroy(&live);
1159         free(insns);
1160         obstack_free(obst, base);
1161         env->curr_nr = curr_nr;
1162 }
1163
1164 static void appel_inter_block_aff(ir_node *bl, void *data)
1165 {
1166         appel_clique_walker_t *env = data;
1167         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1168
1169         int i, j, n;
1170
1171         for(i = 0; i < bli->n_live_in; ++i) {
1172                 ir_node *irn = bli->live_in[i];
1173
1174                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1175                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1176
1177                         int nr = appel_get_live_end_nr(env, pred, irn);
1178                         assert(nr >= 0);
1179                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1180                 }
1181         }
1182
1183         for(i = 0; i < bli->n_phi; ++i) {
1184                 ir_node *irn = bli->phi[i];
1185
1186                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1187                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1188                         ir_node *op = get_irn_n(irn, j);
1189
1190                         int nr = appel_get_live_end_nr(env, pred, op);
1191                         assert(nr >= 0);
1192                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1193                 }
1194         }
1195
1196 }
1197
1198 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1199 {
1200         unsigned i;
1201         unsigned n_colors;
1202         appel_clique_walker_t env;
1203         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1204         be_lv_t *lv = co->cenv->birg->lv;
1205
1206         be_liveness_recompute(lv);
1207         obstack_init(&env.obst);
1208         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1209         env.curr_nr = co->cls->n_regs;
1210         env.co = co;
1211         env.f = f;
1212
1213         bitset_copy(adm, co->cenv->ignore_colors);
1214         bitset_flip_all(adm);
1215
1216         /* Make color map. */
1217         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1218         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1219                 const arch_register_t *reg = &co->cls->regs[i];
1220                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : (int) n_colors++;
1221         }
1222
1223         env.dumb = 1;
1224         env.curr_nr = n_colors;
1225         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1226         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1227
1228         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1229
1230         /* make the first k nodes interfere */
1231         for(i = 0; i < n_colors; ++i) {
1232                 unsigned j;
1233                 for(j = i + 1; j < n_colors; ++j)
1234                         fprintf(f, "%d %d -1 ", i, j);
1235                 fprintf(f, "\n");
1236         }
1237
1238         env.dumb = 0;
1239         env.curr_nr = n_colors;
1240         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1241         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1242         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1243         obstack_free(&env.obst, NULL);
1244 }
1245
1246 /*
1247          ___ _____ ____   ____   ___ _____   ____                        _
1248         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1249          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1250          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1251         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1252                                                                   |_|            |___/
1253 */
1254
1255 static const char *get_dot_color_name(size_t col)
1256 {
1257         static const char *names[] = {
1258                 "blue",
1259                 "red",
1260                 "green",
1261                 "yellow",
1262                 "cyan",
1263                 "magenta",
1264                 "orange",
1265                 "chocolate",
1266                 "beige",
1267                 "navy",
1268                 "darkgreen",
1269                 "darkred",
1270                 "lightPink",
1271                 "chartreuse",
1272                 "lightskyblue",
1273                 "linen",
1274                 "pink",
1275                 "lightslateblue",
1276                 "mintcream",
1277                 "red",
1278                 "darkolivegreen",
1279                 "mediumblue",
1280                 "mistyrose",
1281                 "salmon",
1282                 "darkseagreen",
1283                 "mediumslateblue"
1284                 "moccasin",
1285                 "tomato",
1286                 "forestgreen",
1287                 "darkturquoise",
1288                 "palevioletred"
1289         };
1290
1291         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1292 }
1293
1294 typedef struct _co_ifg_dump_t {
1295         const copy_opt_t *co;
1296         unsigned flags;
1297 } co_ifg_dump_t;
1298
1299 static void ifg_dump_graph_attr(FILE *f, void *self)
1300 {
1301         (void) self;
1302         fprintf(f, "overlap=scale");
1303 }
1304
1305 static int ifg_is_dump_node(void *self, ir_node *irn)
1306 {
1307         co_ifg_dump_t *cod = self;
1308         return !arch_irn_is(cod->co->aenv, irn, ignore);
1309 }
1310
1311 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1312 {
1313         co_ifg_dump_t *env         = self;
1314         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1315         const arch_register_req_t *req;
1316         int limited;
1317
1318         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1319         limited = arch_register_req_is(req, limited);
1320
1321         if(env->flags & CO_IFG_DUMP_LABELS) {
1322                 ir_fprintf(f, "label=\"%+F", irn);
1323
1324 #if 0
1325                 // TODO fix this...
1326                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1327                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1328                         req.limited(req.limited_env, bs);
1329                         ir_fprintf(f, "\\n%B", bs);
1330                 }
1331 #endif
1332                 ir_fprintf(f, "\" ");
1333         } else {
1334                 fprintf(f, "label=\"\" shape=point " );
1335         }
1336
1337         if(env->flags & CO_IFG_DUMP_SHAPE)
1338                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1339
1340         if(env->flags & CO_IFG_DUMP_COLORS)
1341                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1342 }
1343
1344 static void ifg_dump_at_end(FILE *file, void *self)
1345 {
1346         co_ifg_dump_t *env = self;
1347         affinity_node_t *a;
1348
1349         co_gs_foreach_aff_node(env->co, a) {
1350                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1351                 unsigned aidx = get_irn_idx(a->irn);
1352                 neighb_t *n;
1353
1354                 co_gs_foreach_neighb(a, n) {
1355                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1356                         unsigned nidx = get_irn_idx(n->irn);
1357
1358                         if(aidx < nidx) {
1359                                 const char *color = nr == ar ? "blue" : "red";
1360                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1361                                 if(env->flags & CO_IFG_DUMP_LABELS)
1362                                         fprintf(file, "label=\"%d\" ", n->costs);
1363                                 if(env->flags & CO_IFG_DUMP_COLORS)
1364                                         fprintf(file, "color=%s ", color);
1365                                 else
1366                                         fprintf(file, "style=dotted");
1367                                 fprintf(file, "];\n");
1368                         }
1369                 }
1370         }
1371 }
1372
1373
1374 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1375         ifg_is_dump_node,
1376         ifg_dump_graph_attr,
1377         ifg_dump_node_attr,
1378         NULL,
1379         NULL,
1380         ifg_dump_at_end
1381 };
1382
1383
1384
1385 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1386 {
1387         co_ifg_dump_t cod;
1388
1389         cod.co    = co;
1390         cod.flags = flags;
1391         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1392 }
1393
1394
1395 void co_solve_park_moon(copy_opt_t *opt)
1396 {
1397         (void) opt;
1398 }
1399
1400 static int void_algo(copy_opt_t *co)
1401 {
1402         (void) co;
1403         return 0;
1404 }
1405
1406 /*
1407                 _    _                  _ _   _
1408            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1409           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1410          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1411         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1412                            |___/
1413 */
1414
1415 typedef struct {
1416         co_algo_t  *algo;
1417         const char *name;
1418         int        can_improve_existing;
1419 } co_algo_info_t;
1420
1421 static co_algo_info_t algos[] = {
1422         { void_algo,               "none",  0 },
1423         { co_solve_heuristic,      "heur1", 0 },
1424         { co_solve_heuristic_new,  "heur2", 0 },
1425         { co_solve_heuristic_java, "heur3", 0 },
1426         { co_solve_heuristic_mst,  "heur4", 0 },
1427 #ifdef WITH_ILP
1428         { co_solve_ilp2,           "ilp",   1 },
1429 #endif
1430         { NULL,                    "",      0 }
1431 };
1432
1433 /*
1434     __  __       _         ____       _
1435    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1436    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1437    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1438    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1439
1440 */
1441
1442 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1443 {
1444         FILE *result;
1445         char buf[1024];
1446
1447         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1448         result = fopen(buf, "wt");
1449         if(result == NULL) {
1450                 panic("Couldn't open '%s' for writing.", buf);
1451         }
1452
1453         return result;
1454 }
1455
1456 void co_driver(be_chordal_env_t *cenv)
1457 {
1458         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1459         co_complete_stats_t before, after;
1460         copy_opt_t          *co;
1461         co_algo_t           *algo_func;
1462         int                 was_optimal = 0;
1463
1464         if (algo >= CO_ALGO_LAST)
1465                 return;
1466
1467         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1468
1469         co = new_copy_opt(cenv, cost_func);
1470         co_build_ou_structure(co);
1471         co_build_graph_structure(co);
1472
1473         co_complete_stats(co, &before);
1474
1475         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1476         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1477         be_stat_ev_ull("co_max_costs",    before.max_costs);
1478         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1479         be_stat_ev_ull("co_aff_int",      before.aff_int);
1480
1481         be_stat_ev_ull("co_init_costs",   before.costs);
1482         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1483
1484         /* Dump the interference graph in Appel's format. */
1485         if (dump_flags & DUMP_APPEL) {
1486                 FILE *f = my_open(cenv, "", ".apl");
1487                 co_dump_appel_graph(co, f);
1488                 fclose(f);
1489         }
1490
1491         if (dump_flags & DUMP_BEFORE) {
1492                 FILE *f = my_open(cenv, "", "-before.dot");
1493                 co_dump_ifg_dot(co, f, style_flags);
1494                 fclose(f);
1495         }
1496
1497         /* if the algo can improve results, provide an initial solution with heur3 */
1498         if (improve && algos[algo].can_improve_existing) {
1499                 co_complete_stats_t stats;
1500
1501                 /* produce a heuristic solution */
1502 #ifdef WITH_JVM
1503                 co_solve_heuristic_java(co);
1504 #else
1505                 co_solve_heuristic(co);
1506 #endif /* WITH_JVM */
1507
1508                 /* do the stats and provide the current costs */
1509                 co_complete_stats(co, &stats);
1510                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1511         }
1512
1513 #ifdef WITH_JVM
1514         /* start the JVM here so that it does not tamper the timing. */
1515         if (algo == CO_ALGO_HEUR3)
1516                 be_java_coal_start_jvm();
1517 #endif /* WITH_JVM */
1518
1519         algo_func = algos[algo].algo;
1520
1521         /* perform actual copy minimization */
1522         lc_timer_reset_and_start(timer);
1523         was_optimal = algo_func(co);
1524         lc_timer_stop(timer);
1525
1526         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1527         be_stat_ev_ull("co_optimal", was_optimal);
1528
1529         if (dump_flags & DUMP_AFTER) {
1530                 FILE *f = my_open(cenv, "", "-after.dot");
1531                 co_dump_ifg_dot(co, f, style_flags);
1532                 fclose(f);
1533         }
1534
1535         co_complete_stats(co, &after);
1536
1537         if (do_stats) {
1538                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1539                 ulong64 evitable          = after.costs     - after.inevit_costs;
1540
1541                 ir_printf("%30F ", cenv->irg);
1542                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1543
1544                 if(optimizable_costs > 0)
1545                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1546                 else
1547                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1548         }
1549
1550         be_stat_ev_ull("co_after_costs", after.costs);
1551         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1552
1553         co_free_graph_structure(co);
1554         co_free_ou_structure(co);
1555         free_copy_opt(co);
1556 }