8464541ab8db977674d55f8c4f1bb0fea1f612ec
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irgraph.h"
42 #include "irgwalk.h"
43 #include "irprog.h"
44 #include "irloop_t.h"
45 #include "iredges_t.h"
46 #include "phiclass.h"
47 #include "irbitset.h"
48 #include "irphase_t.h"
49 #include "irprintf_t.h"
50
51 #include "bemodule.h"
52 #include "bearch_t.h"
53 #include "benode_t.h"
54 #include "beutil.h"
55 #include "beifg_t.h"
56 #include "becopyopt_t.h"
57 #include "becopystat.h"
58 #include "belive_t.h"
59 #include "beinsn_t.h"
60 #include "besched_t.h"
61 #include "benodesets.h"
62 #include "bejavacoal.h"
63 #include "bestatevent.h"
64 #include "beirg_t.h"
65 #include "error.h"
66 #include "bera.h"
67
68 #include <libcore/lc_timing.h>
69 #include <libcore/lc_opts.h>
70 #include <libcore/lc_opts_enum.h>
71
72 #define DUMP_BEFORE 1
73 #define DUMP_AFTER  2
74 #define DUMP_APPEL  4
75 #define DUMP_ALL    2 * DUMP_APPEL - 1
76
77 #define COST_FUNC_FREQ     1
78 #define COST_FUNC_LOOP     2
79 #define COST_FUNC_ALL_ONE  3
80
81 static unsigned   dump_flags  = 0;
82 static unsigned   style_flags = 0;
83 static unsigned   do_stats    = 0;
84 static cost_fct_t cost_func   = co_get_costs_exec_freq;
85 static unsigned   algo        = CO_ALGO_HEUR4;
86 static int        improve     = 1;
87
88 static const lc_opt_enum_mask_items_t dump_items[] = {
89         { "before",  DUMP_BEFORE },
90         { "after",   DUMP_AFTER  },
91         { "appel",   DUMP_APPEL  },
92         { "all",     DUMP_ALL    },
93         { NULL,      0 }
94 };
95
96 static const lc_opt_enum_mask_items_t style_items[] = {
97         { "color",   CO_IFG_DUMP_COLORS },
98         { "labels",  CO_IFG_DUMP_LABELS },
99         { "constr",  CO_IFG_DUMP_CONSTR },
100         { "shape",   CO_IFG_DUMP_SHAPE  },
101         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
102         { NULL,      0 }
103 };
104
105 static const lc_opt_enum_mask_items_t algo_items[] = {
106         { "none",   CO_ALGO_NONE  },
107         { "heur",   CO_ALGO_HEUR  },
108         { "heur2",  CO_ALGO_HEUR2 },
109 #ifdef WITH_JVM
110         { "heur3",  CO_ALGO_HEUR3 },
111 #endif /* WITH_JVM */
112         { "heur4",  CO_ALGO_HEUR4 },
113 #ifdef WITH_ILP
114         { "ilp",    CO_ALGO_ILP   },
115 #endif /* WITH_ILP */
116         { NULL,     0 }
117 };
118
119 typedef int (*opt_funcptr)(void);
120
121 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
122         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
123         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
124         { "one",    (opt_funcptr) co_get_costs_all_one },
125         { NULL,     NULL }
126 };
127
128 static lc_opt_enum_mask_var_t dump_var = {
129         &dump_flags, dump_items
130 };
131
132 static lc_opt_enum_mask_var_t style_var = {
133         &style_flags, style_items
134 };
135
136 static lc_opt_enum_mask_var_t algo_var = {
137         &algo, algo_items
138 };
139
140 static lc_opt_enum_func_ptr_var_t cost_func_var = {
141         (opt_funcptr*) &cost_func, cost_func_items
142 };
143
144 static const lc_opt_table_entry_t options[] = {
145         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
146         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
147         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
148         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
149         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
150         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
151         { NULL }
152 };
153
154 /* Insert additional options registration functions here. */
155 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
156 extern void be_co2_register_options(lc_opt_entry_t *grp);
157 extern void be_co3_register_options(lc_opt_entry_t *grp);
158
159 void be_init_copycoal(void)
160 {
161         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
162         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
163         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
164         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
165
166         lc_opt_add_table(co_grp, options);
167 }
168
169 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
170
171 #undef QUICK_AND_DIRTY_HACK
172
173 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
174 {
175         if (env->ifg)
176                 return be_ifg_connected(env->ifg, a, b);
177         else
178                 return values_interfere(env->birg->lv, a, b);
179 }
180
181
182 /******************************************************************************
183     _____                           _
184    / ____|                         | |
185   | |  __  ___ _ __   ___ _ __ __ _| |
186   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
187   | |__| |  __/ | | |  __/ | | (_| | |
188    \_____|\___|_| |_|\___|_|  \__,_|_|
189
190  ******************************************************************************/
191
192 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
193
194
195 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
196 {
197         const char *s1, *s2, *s3;
198         int len;
199         copy_opt_t *co;
200
201         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
202
203         co = xcalloc(1, sizeof(*co));
204         co->cenv      = chordal_env;
205         co->aenv      = chordal_env->birg->main_env->arch_env;
206         co->irg       = chordal_env->irg;
207         co->cls       = chordal_env->cls;
208         co->get_costs = get_costs;
209
210         s1 = get_irp_prog_name();
211         s2 = get_entity_name(get_irg_entity(co->irg));
212         s3 = chordal_env->cls->name;
213         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
214         co->name = xmalloc(len);
215         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
216
217         return co;
218 }
219
220 void free_copy_opt(copy_opt_t *co) {
221         xfree(co->name);
222         free(co);
223 }
224
225 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
226         const arch_register_req_t *req;
227         const arch_register_t *reg;
228
229         if (arch_irn_is(co->aenv, irn, ignore))
230                 return 0;
231
232         reg = arch_get_irn_register(co->aenv, irn);
233         if (arch_register_type_is(reg, ignore))
234                 return 0;
235
236         req = arch_get_register_req(co->aenv, irn, -1);
237         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
238                 return 1;
239
240         return 0;
241 }
242
243 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
244         const ir_edge_t *edge;
245         const arch_register_t *reg;
246
247         assert(0 && "Is buggy and obsolete. Do not use");
248
249         if (arch_irn_is(co->aenv, irn, ignore))
250                 return 0;
251
252         reg = arch_get_irn_register(co->aenv, irn);
253         if (arch_register_type_is(reg, ignore))
254                 return 0;
255
256         foreach_out_edge(irn, edge) {
257                 ir_node *n = edge->src;
258
259                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
260                         const arch_register_req_t *req;
261                         req = arch_get_register_req(co->aenv, n, -1);
262
263                         if(is_Reg_Phi(n) ||
264                            is_Perm(co->aenv, n) ||
265                            (arch_register_req_is(req, should_be_same))) {
266                                 ir_node *other = get_irn_n(irn, req->other_same);
267                                 if(other == irn)
268                                         return 1;
269                         }
270                 }
271         }
272
273         return 0;
274 }
275
276 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
277         int cost = 0;
278         ir_loop *loop;
279         ir_node *root_block = get_nodes_block(root);
280
281         if (is_Phi(root)) {
282                 /* for phis the copies are placed in the corresponding pred-block */
283                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
284         } else {
285                 /* a perm places the copy in the same block as it resides */
286                 loop = get_irn_loop(root_block);
287         }
288         if (loop) {
289                 int d = get_loop_depth(loop);
290                 cost = d*d;
291         }
292         return 1+cost;
293 }
294
295 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
296         int res;
297         ir_node *root_bl = get_nodes_block(root);
298         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
299         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
300
301         /* don't allow values smaller than one. */
302         return res < 1 ? 1 : res;
303 }
304
305
306 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
307         return 1;
308 }
309
310 /******************************************************************************
311    ____        _   _    _       _ _          _____ _
312   / __ \      | | | |  | |     (_) |        / ____| |
313  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
314  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
315  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
316   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
317         | |                                                            __/ |
318         |_|                                                           |___/
319  ******************************************************************************/
320
321 /**
322  * Determines a maximum weighted independent set with respect to
323  * the interference and conflict edges of all nodes in a qnode.
324  */
325 static int ou_max_ind_set_costs(unit_t *ou) {
326         be_chordal_env_t *chordal_env = ou->co->cenv;
327         ir_node **safe, **unsafe;
328         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
329         bitset_t *curr;
330         int max, pos, curr_weight, best_weight = 0;
331
332         /* assign the nodes into two groups.
333          * safe: node has no interference, hence it is in every max stable set.
334          * unsafe: node has an interference
335          */
336         safe = alloca((ou->node_count-1) * sizeof(*safe));
337         safe_costs = 0;
338         safe_count = 0;
339         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
340         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
341         unsafe_count = 0;
342         for(i=1; i<ou->node_count; ++i) {
343                 int is_safe = 1;
344                 for(o=1; o<ou->node_count; ++o) {
345                         if (i==o)
346                                 continue;
347                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
348                                 unsafe_costs[unsafe_count] = ou->costs[i];
349                                 unsafe[unsafe_count] = ou->nodes[i];
350                                 ++unsafe_count;
351                                 is_safe = 0;
352                                 break;
353                         }
354                 }
355                 if (is_safe) {
356                         safe_costs += ou->costs[i];
357                         safe[safe_count++] = ou->nodes[i];
358                 }
359         }
360
361
362         /* now compute the best set out of the unsafe nodes*/
363         if (unsafe_count > MIS_HEUR_TRIGGER) {
364                 bitset_t *best = bitset_alloca(unsafe_count);
365                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
366                 for (i=0; i<unsafe_count; ++i) {
367                         bitset_set(best, i);
368                         /* check if it is a stable set */
369                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
370                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
371                                         bitset_clear(best, i); /* clear the bit and try next one */
372                                         break;
373                                 }
374                 }
375                 /* compute the weight */
376                 bitset_foreach(best, pos)
377                         best_weight += unsafe_costs[pos];
378         } else {
379                 /* Exact Algorithm: Brute force */
380                 curr = bitset_alloca(unsafe_count);
381                 bitset_set_all(curr);
382                 while ((max = bitset_popcnt(curr)) != 0) {
383                         /* check if curr is a stable set */
384                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
385                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
386                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
387                                                         goto no_stable_set;
388
389                         /* if we arrive here, we have a stable set */
390                         /* compute the weigth of the stable set*/
391                         curr_weight = 0;
392                         bitset_foreach(curr, pos)
393                                 curr_weight += unsafe_costs[pos];
394
395                         /* any better ? */
396                         if (curr_weight > best_weight) {
397                                 best_weight = curr_weight;
398                         }
399
400         no_stable_set:
401                         bitset_minus1(curr);
402                 }
403         }
404
405         return safe_costs+best_weight;
406 }
407
408 static void co_collect_units(ir_node *irn, void *env) {
409         copy_opt_t *co = env;
410         unit_t *unit;
411
412         if (!is_curr_reg_class(co, irn))
413                 return;
414         if (!co_is_optimizable_root(co, irn))
415                 return;
416
417         /* Init a new unit */
418         unit = xcalloc(1, sizeof(*unit));
419         unit->co = co;
420         unit->node_count = 1;
421         INIT_LIST_HEAD(&unit->queue);
422
423         /* Phi with some/all of its arguments */
424         if (is_Reg_Phi(irn)) {
425                 int i, arity;
426
427                 /* init */
428                 arity = get_irn_arity(irn);
429                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
430                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
431                 unit->nodes[0] = irn;
432
433                 /* fill */
434                 for (i=0; i<arity; ++i) {
435                         int o, arg_pos;
436                         ir_node *arg = get_irn_n(irn, i);
437
438                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
439                         if (arg == irn)
440                                 continue;
441                         if (nodes_interfere(co->cenv, irn, arg)) {
442                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
443                                 continue;
444                         }
445
446                         /* Else insert the argument of the phi to the members of this ou */
447                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
448
449                         if (! arch_irn_is(co->aenv, arg, ignore)) {
450                                 /* Check if arg has occurred at a prior position in the arg/list */
451                                 arg_pos = 0;
452                                 for (o=1; o<unit->node_count; ++o) {
453                                         if (unit->nodes[o] == arg) {
454                                                 arg_pos = o;
455                                                 break;
456                                         }
457                                 }
458
459                                 if (!arg_pos) { /* a new argument */
460                                         /* insert node, set costs */
461                                         unit->nodes[unit->node_count] = arg;
462                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
463                                         unit->node_count++;
464                                 } else { /* arg has occured before in same phi */
465                                         /* increase costs for existing arg */
466                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
467                                 }
468                         }
469                 }
470                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
471                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
472         } else
473
474         /* Proj of a perm with corresponding arg */
475         if (is_Perm_Proj(co->aenv, irn)) {
476                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
477                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
478                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
479                 unit->node_count = 2;
480                 unit->nodes[0] = irn;
481                 unit->nodes[1] = get_Perm_src(irn);
482                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
483         } else {
484                 const arch_register_req_t *req =
485                         arch_get_register_req(co->aenv, irn, -1);
486
487                 /* Src == Tgt of a 2-addr-code instruction */
488                 if (is_2addr_code(req)) {
489                         ir_node *other = get_irn_n(irn, req->other_same);
490                         if (!arch_irn_is(co->aenv, other, ignore) &&
491                                         !nodes_interfere(co->cenv, irn, other)) {
492                                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
493                                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
494                                 unit->node_count = 2;
495                                 unit->nodes[0] = irn;
496                                 unit->nodes[1] = other;
497                                 unit->costs[1] = co->get_costs(co, irn, other, -1);
498                         }
499                 } else {
500                         assert(0 && "This is not an optimizable node!");
501                 }
502         }
503
504         /* Insert the new unit at a position according to its costs */
505         if (unit->node_count > 1) {
506                 int i;
507                 struct list_head *tmp;
508
509                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
510                 for(i=1; i<unit->node_count; ++i) {
511                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
512                         unit->all_nodes_costs += unit->costs[i];
513                 }
514
515                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
516                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
517                 /* Insert the new ou according to its sort_key */
518                 tmp = &co->units;
519                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
520                         tmp = tmp->next;
521                 list_add(&unit->units, tmp);
522         } else {
523                 free(unit);
524         }
525 }
526
527 #ifdef QUICK_AND_DIRTY_HACK
528
529 static int compare_ous(const void *k1, const void *k2) {
530         const unit_t *u1 = *((const unit_t **) k1);
531         const unit_t *u2 = *((const unit_t **) k2);
532         int i, o, u1_has_constr, u2_has_constr;
533         arch_register_req_t req;
534         const arch_env_t *aenv = u1->co->aenv;
535
536         /* Units with constraints come first */
537         u1_has_constr = 0;
538         for (i=0; i<u1->node_count; ++i) {
539                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
540                 if (arch_register_req_is(&req, limited)) {
541                         u1_has_constr = 1;
542                         break;
543                 }
544         }
545
546         u2_has_constr = 0;
547         for (i=0; i<u2->node_count; ++i) {
548                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
549                 if (arch_register_req_is(&req, limited)) {
550                         u2_has_constr = 1;
551                         break;
552                 }
553         }
554
555         if (u1_has_constr != u2_has_constr)
556                 return u2_has_constr - u1_has_constr;
557
558         /* Now check, whether the two units are connected */
559 #if 0
560         for (i=0; i<u1->node_count; ++i)
561                 for (o=0; o<u2->node_count; ++o)
562                         if (u1->nodes[i] == u2->nodes[o])
563                                 return 0;
564 #endif
565
566         /* After all, the sort key decides. Greater keys come first. */
567         return u2->sort_key - u1->sort_key;
568
569 }
570
571 /**
572  * Sort the ou's according to constraints and their sort_key
573  */
574 static void co_sort_units(copy_opt_t *co) {
575         int i, count = 0, costs;
576         unit_t *ou, **ous;
577
578         /* get the number of ous, remove them form the list and fill the array */
579         list_for_each_entry(unit_t, ou, &co->units, units)
580                 count++;
581         ous = alloca(count * sizeof(*ous));
582
583         costs = co_get_max_copy_costs(co);
584
585         i = 0;
586         list_for_each_entry(unit_t, ou, &co->units, units)
587                 ous[i++] = ou;
588
589         INIT_LIST_HEAD(&co->units);
590
591         assert(count == i && list_empty(&co->units));
592
593         for (i=0; i<count; ++i)
594                 ir_printf("%+F\n", ous[i]->nodes[0]);
595
596         qsort(ous, count, sizeof(*ous), compare_ous);
597
598         ir_printf("\n\n");
599         for (i=0; i<count; ++i)
600                 ir_printf("%+F\n", ous[i]->nodes[0]);
601
602         /* reinsert into list in correct order */
603         for (i=0; i<count; ++i)
604                 list_add_tail(&ous[i]->units, &co->units);
605
606         assert(costs == co_get_max_copy_costs(co));
607 }
608 #endif
609
610 void co_build_ou_structure(copy_opt_t *co) {
611         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
612         INIT_LIST_HEAD(&co->units);
613         irg_walk_graph(co->irg, co_collect_units, NULL, co);
614 #ifdef QUICK_AND_DIRTY_HACK
615         co_sort_units(co);
616 #endif
617 }
618
619 void co_free_ou_structure(copy_opt_t *co) {
620         unit_t *curr, *tmp;
621         ASSERT_OU_AVAIL(co);
622         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
623                 xfree(curr->nodes);
624                 xfree(curr->costs);
625                 xfree(curr);
626         }
627         co->units.next = NULL;
628 }
629
630 /* co_solve_heuristic() is implemented in becopyheur.c */
631
632 int co_get_max_copy_costs(const copy_opt_t *co) {
633         int i, res = 0;
634         unit_t *curr;
635
636         ASSERT_OU_AVAIL(co);
637
638         list_for_each_entry(unit_t, curr, &co->units, units) {
639                 res += curr->inevitable_costs;
640                 for (i=1; i<curr->node_count; ++i)
641                         res += curr->costs[i];
642         }
643         return res;
644 }
645
646 int co_get_inevit_copy_costs(const copy_opt_t *co) {
647         int res = 0;
648         unit_t *curr;
649
650         ASSERT_OU_AVAIL(co);
651
652         list_for_each_entry(unit_t, curr, &co->units, units)
653                 res += curr->inevitable_costs;
654         return res;
655 }
656
657 int co_get_copy_costs(const copy_opt_t *co) {
658         int i, res = 0;
659         unit_t *curr;
660
661         ASSERT_OU_AVAIL(co);
662
663         list_for_each_entry(unit_t, curr, &co->units, units) {
664                 int root_col = get_irn_col(co, curr->nodes[0]);
665                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
666                 res += curr->inevitable_costs;
667                 for (i=1; i<curr->node_count; ++i) {
668                         int arg_col = get_irn_col(co, curr->nodes[i]);
669                         if (root_col != arg_col) {
670                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
671                                 res += curr->costs[i];
672                         }
673                 }
674         }
675         return res;
676 }
677
678 int co_get_lower_bound(const copy_opt_t *co) {
679         int res = 0;
680         unit_t *curr;
681
682         ASSERT_OU_AVAIL(co);
683
684         list_for_each_entry(unit_t, curr, &co->units, units)
685                 res += curr->inevitable_costs + curr->min_nodes_costs;
686         return res;
687 }
688
689 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
690 {
691         bitset_t *seen = bitset_irg_malloc(co->irg);
692         affinity_node_t *an;
693
694         memset(stat, 0, sizeof(stat[0]));
695
696         /* count affinity edges. */
697         co_gs_foreach_aff_node(co, an) {
698                 neighb_t *neigh;
699                 stat->aff_nodes += 1;
700                 bitset_add_irn(seen, an->irn);
701                 co_gs_foreach_neighb(an, neigh) {
702                         if(!bitset_contains_irn(seen, neigh->irn)) {
703                                 stat->aff_edges += 1;
704                                 stat->max_costs += neigh->costs;
705
706                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
707                                         stat->costs += neigh->costs;
708                                         stat->unsatisfied_edges += 1;
709                                 }
710
711                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
712                                         stat->aff_int += 1;
713                                         stat->inevit_costs += neigh->costs;
714                                 }
715
716                         }
717                 }
718         }
719
720         bitset_free(seen);
721 }
722
723 /******************************************************************************
724    _____                 _        _____ _
725   / ____|               | |      / ____| |
726  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
727  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
728  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
729   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
730                   | |                                       __/ |
731                   |_|                                      |___/
732  ******************************************************************************/
733
734 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
735         const affinity_node_t *n1 = k1;
736         const affinity_node_t *n2 = k2;
737
738         return (n1->irn != n2->irn);
739 }
740
741 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
742         affinity_node_t new_node, *node;
743         neighb_t new_nbr, *nbr;
744         int allocnew;
745
746         new_node.irn        = n1;
747         new_node.degree     = 0;
748         new_node.neighbours = NULL;
749         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
750
751         allocnew = 1;
752         for (nbr = node->neighbours; nbr; nbr = nbr->next)
753                 if (nbr->irn == n2) {
754                         allocnew = 0;
755                         break;
756                 }
757
758         /* if we did not find n2 in n1's neighbourhood insert it */
759         if (allocnew) {
760                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
761                 nbr = obstack_finish(&co->obst);
762                 nbr->irn   = n2;
763                 nbr->costs = 0;
764                 nbr->next  = node->neighbours;
765                 node->neighbours = nbr;
766                 node->degree++;
767         }
768
769         /* now nbr points to n1's neighbour-entry of n2 */
770         nbr->costs += costs;
771 }
772
773 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
774         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
775                 add_edge(co, n1, n2, costs);
776                 add_edge(co, n2, n1, costs);
777         }
778 }
779
780 static void build_graph_walker(ir_node *irn, void *env) {
781         copy_opt_t *co = env;
782         int pos, max;
783         const arch_register_t *reg;
784
785         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
786                 return;
787
788         reg = arch_get_irn_register(co->aenv, irn);
789         if (arch_register_type_is(reg, ignore))
790                 return;
791
792         /* Phis */
793         if (is_Reg_Phi(irn))
794                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
795                         ir_node *arg = get_irn_n(irn, pos);
796                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
797                 }
798
799         /* Perms */
800         else if (is_Perm_Proj(co->aenv, irn)) {
801                 ir_node *arg = get_Perm_src(irn);
802                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
803         }
804
805         /* 2-address code */
806         else {
807                 const arch_register_req_t *req =
808                         arch_get_register_req(co->aenv, irn, -1);
809                 if (is_2addr_code(req)) {
810                         ir_node *other = get_irn_n(irn, req->other_same);
811                         if(!arch_irn_is(co->aenv, other, ignore))
812                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
813                 }
814         }
815 }
816
817 void co_build_graph_structure(copy_opt_t *co) {
818         obstack_init(&co->obst);
819         co->nodes = new_set(compare_affinity_node_t, 32);
820
821         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
822 }
823
824 void co_free_graph_structure(copy_opt_t *co) {
825         ASSERT_GS_AVAIL(co);
826
827         del_set(co->nodes);
828         obstack_free(&co->obst, NULL);
829         co->nodes = NULL;
830 }
831
832 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
833
834 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
835         affinity_node_t new_node, *n;
836
837         ASSERT_GS_AVAIL(co);
838
839         new_node.irn = irn;
840         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
841         if (n) {
842                 return (n->degree > 0);
843         } else
844                 return 0;
845 }
846
847 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
848 {
849         be_ifg_t *ifg  = co->cenv->ifg;
850         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
851
852         ir_node *irn;
853         void *it, *nit;
854         int i, n, n_regs;
855
856         n_regs = 0;
857         for(i = 0; i < co->cls->n_regs; ++i) {
858                 const arch_register_t *reg = &co->cls->regs[i];
859                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
860         }
861
862         /*
863          * n contains the first node number.
864          * the values below n are the pre-colored register nodes
865          */
866
867         it  = be_ifg_nodes_iter_alloca(ifg);
868         nit = be_ifg_neighbours_iter_alloca(ifg);
869
870         n = n_regs;
871         be_ifg_foreach_node(ifg, it, irn) {
872                 if(!arch_irn_is(co->aenv, irn, ignore))
873                         set_irn_link(irn, INT_TO_PTR(n++));
874         }
875
876         fprintf(f, "%d %d\n", n, n_regs);
877
878         be_ifg_foreach_node(ifg, it, irn) {
879                 if(!arch_irn_is(co->aenv, irn, ignore)) {
880                         int idx            = PTR_TO_INT(get_irn_link(irn));
881                         affinity_node_t *a = get_affinity_info(co, irn);
882
883                         const arch_register_req_t *req;
884                         ir_node *adj;
885
886                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
887                         if(arch_register_req_is(req, limited)) {
888                                 for(i = 0; i < co->cls->n_regs; ++i) {
889                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
890                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
891                                 }
892                         }
893
894
895                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
896                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
897                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
898                                         if(idx < adj_idx)
899                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
900                                 }
901                         }
902
903                         if(a) {
904                                 neighb_t *n;
905
906                                 co_gs_foreach_neighb(a, n) {
907                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
908                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
909                                                 if(idx < n_idx)
910                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
911                                         }
912                                 }
913                         }
914                 }
915         }
916 }
917
918 typedef struct _appel_clique_walker_t {
919         ir_phase ph;
920         const copy_opt_t *co;
921         int curr_nr;
922         int node_count;
923         FILE *f;
924         int dumb;
925         int *color_map;
926         struct obstack obst;
927 } appel_clique_walker_t;
928
929 typedef struct _appel_block_info_t {
930         int *live_end_nr;
931         int *live_in_nr;
932         int *phi_nr;
933         ir_node **live_end;
934         ir_node **live_in;
935         ir_node **phi;
936         int n_live_end;
937         int n_live_in;
938         int n_phi;
939 } appel_block_info_t;
940
941 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
942 {
943 #if 0
944         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
945         int res = (int) freq;
946         return res == 0 ? 1 : res;
947 #else
948         ir_loop *loop = get_irn_loop(bl);
949         if(loop) {
950                 int d = get_loop_depth(loop);
951                 return 1 + d * d;
952         }
953         return 1;
954 #endif
955 }
956
957 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
958 {
959         appel_block_info_t *res = NULL;
960
961         if(is_Block(irn)) {
962                 appel_clique_walker_t *d = (void *) phase;
963                 res = phase_alloc(phase, sizeof(res[0]));
964                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
965                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
966                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
967                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
968                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
969                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
970         }
971
972         return res;
973 }
974
975 typedef struct _insn_list_t {
976         be_insn_t *insn;
977         struct list_head list;
978 } insn_list_t;
979
980 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
981 {
982         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
983         int i;
984
985         for(i = 0; i < bli->n_live_end; ++i)
986                 if(bli->live_end[i] == irn)
987                         return bli->live_end_nr[i];
988
989         return -1;
990 }
991
992 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
993 {
994         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
995         ir_node *irn;
996         int n_live;
997         int j;
998
999         n_live = 0;
1000         foreach_pset(live, irn)
1001                 live_arr[n_live++] = irn;
1002
1003         /* dump the live after clique */
1004         if(!env->dumb) {
1005                 for(j = 0; j < n_live; ++j) {
1006                         int k;
1007
1008                         for(k = j + 1; k < n_live; ++k) {
1009                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
1010                         }
1011                         fprintf(env->f, "\n");
1012                 }
1013         }
1014
1015         /* dump the affinities */
1016         for(j = 0; !env->dumb && j < n_live; ++j) {
1017                 ir_node *irn = live_arr[j];
1018                 int old_nr = PTR_TO_INT(get_irn_link(irn));
1019
1020                 /* if the node was already live in the last insn dump the affinity */
1021                 if(old_nr > start_nr) {
1022                         int weight = appel_aff_weight(env, bl);
1023                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
1024                 }
1025         }
1026
1027         /* set the current numbers into the link field. */
1028         for(j = 0; j < n_live; ++j) {
1029                 ir_node *irn = live_arr[j];
1030                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1031         }
1032
1033         return curr_nr + n_live;
1034 }
1035
1036 static void appel_walker(ir_node *bl, void *data)
1037 {
1038         appel_clique_walker_t *env = data;
1039         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1040         struct obstack *obst       = &env->obst;
1041         void *base                 = obstack_base(obst);
1042         pset *live                 = pset_new_ptr_default();
1043         be_lv_t *lv                = env->co->cenv->birg->lv;
1044
1045         int n_insns  = 0;
1046         int n_nodes  = 0;
1047         int start_nr = env->curr_nr;
1048         int curr_nr  = start_nr;
1049
1050         be_insn_env_t insn_env;
1051         int i, j;
1052         ir_node *irn;
1053         be_insn_t **insns;
1054
1055         insn_env.aenv = env->co->aenv;
1056         insn_env.cls  = env->co->cls;
1057         insn_env.obst = obst;
1058         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1059
1060         /* Guess how many insns will be in this block. */
1061         sched_foreach(bl, irn)
1062                 n_nodes++;
1063
1064         bli->n_phi = 0;
1065         insns = malloc(n_nodes * sizeof(insns[0]));
1066
1067         /* Put all insns in an array. */
1068         irn = sched_first(bl);
1069         while(!sched_is_end(irn)) {
1070                 be_insn_t *insn;
1071                 insn = be_scan_insn(&insn_env, irn);
1072                 insns[n_insns++] = insn;
1073                 irn = insn->next_insn;
1074         }
1075
1076         DBG((dbg, LEVEL_2, "%+F\n", bl));
1077         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live);
1078
1079         /* Generate the bad and ugly. */
1080         for(i = n_insns - 1; i >= 0; --i) {
1081                 be_insn_t *insn = insns[i];
1082
1083                 /* The first live set has to be saved in the block border set. */
1084                 if(i == n_insns - 1) {
1085                         j = 0;
1086                         foreach_pset(live, irn) {
1087                                 bli->live_end[j]    = irn;
1088                                 bli->live_end_nr[j] = curr_nr + j;
1089                                 ++j;
1090                         }
1091                         bli->n_live_end = j;
1092                 }
1093
1094                 if(!env->dumb) {
1095                         for(j = 0; j < insn->use_start; ++j) {
1096                                 ir_node *op   = insn->ops[j].carrier;
1097                                 bitset_t *adm = insn->ops[j].regs;
1098                                 int k;
1099                                 int nr;
1100
1101                                 if(!insn->ops[j].has_constraints)
1102                                         continue;
1103
1104                                 nr = 0;
1105                                 foreach_pset(live, irn) {
1106                                         if(irn == op) {
1107                                                 pset_break(live);
1108                                                 break;
1109                                         }
1110                                         ++nr;
1111                                 }
1112
1113                                 assert(nr < pset_count(live));
1114
1115                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1116                                         int mapped_col = env->color_map[k];
1117                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1118                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1119                                 }
1120                         }
1121                 }
1122
1123                 /* dump the clique and update the stuff. */
1124                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1125
1126                 /* remove all defs. */
1127                 for(j = 0; j < insn->use_start; ++j)
1128                         pset_remove_ptr(live, insn->ops[j].carrier);
1129
1130                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1131                         bli->phi[bli->n_phi]    = insn->irn;
1132                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1133                         bli->n_phi++;
1134                 }
1135
1136                 /* add all uses */
1137                 else
1138                         for(j = insn->use_start; j < insn->n_ops; ++j)
1139                                 pset_insert_ptr(live, insn->ops[j].carrier);
1140         }
1141
1142         /* print the start clique. */
1143         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1144
1145         i = 0;
1146         foreach_pset(live, irn) {
1147                 bli->live_in[i]    = irn;
1148                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1149                 ++i;
1150         }
1151         bli->n_live_in = i;
1152
1153         del_pset(live);
1154         free(insns);
1155         obstack_free(obst, base);
1156         env->curr_nr = curr_nr;
1157 }
1158
1159 static void appel_inter_block_aff(ir_node *bl, void *data)
1160 {
1161         appel_clique_walker_t *env = data;
1162         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1163
1164         int i, j, n;
1165
1166         for(i = 0; i < bli->n_live_in; ++i) {
1167                 ir_node *irn = bli->live_in[i];
1168
1169                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1170                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1171
1172                         int nr = appel_get_live_end_nr(env, pred, irn);
1173                         assert(nr >= 0);
1174                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1175                 }
1176         }
1177
1178         for(i = 0; i < bli->n_phi; ++i) {
1179                 ir_node *irn = bli->phi[i];
1180
1181                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1182                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1183                         ir_node *op = get_irn_n(irn, j);
1184
1185                         int nr = appel_get_live_end_nr(env, pred, op);
1186                         assert(nr >= 0);
1187                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1188                 }
1189         }
1190
1191 }
1192
1193 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1194 {
1195         int i;
1196         int n_colors;
1197         appel_clique_walker_t env;
1198         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1199         be_lv_t *lv = co->cenv->birg->lv;
1200
1201         be_liveness_recompute(lv);
1202         obstack_init(&env.obst);
1203         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1204         env.curr_nr = co->cls->n_regs;
1205         env.co = co;
1206         env.f = f;
1207
1208         bitset_copy(adm, co->cenv->ignore_colors);
1209         bitset_flip_all(adm);
1210
1211         /* Make color map. */
1212         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1213         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1214                 const arch_register_t *reg = &co->cls->regs[i];
1215                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1216         }
1217
1218         env.dumb = 1;
1219         env.curr_nr = n_colors;
1220         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1221         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1222
1223         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1224
1225         /* make the first k nodes interfere */
1226         for(i = 0; i < n_colors; ++i) {
1227                 int j;
1228                 for(j = i + 1; j < n_colors; ++j)
1229                         fprintf(f, "%d %d -1 ", i, j);
1230                 fprintf(f, "\n");
1231         }
1232
1233         env.dumb = 0;
1234         env.curr_nr = n_colors;
1235         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1236         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1237         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1238         obstack_free(&env.obst, NULL);
1239 }
1240
1241 /*
1242          ___ _____ ____   ____   ___ _____   ____                        _
1243         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1244          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1245          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1246         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1247                                                                   |_|            |___/
1248 */
1249
1250 static const char *get_dot_color_name(int col)
1251 {
1252         static const char *names[] = {
1253                 "blue",
1254                 "red",
1255                 "green",
1256                 "yellow",
1257                 "cyan",
1258                 "magenta",
1259                 "orange",
1260                 "chocolate",
1261                 "beige",
1262                 "navy",
1263                 "darkgreen",
1264                 "darkred",
1265                 "lightPink",
1266                 "chartreuse",
1267                 "lightskyblue",
1268                 "linen",
1269                 "pink",
1270                 "lightslateblue",
1271                 "mintcream",
1272                 "red",
1273                 "darkolivegreen",
1274                 "mediumblue",
1275                 "mistyrose",
1276                 "salmon",
1277                 "darkseagreen",
1278                 "mediumslateblue"
1279                 "moccasin",
1280                 "tomato",
1281                 "forestgreen",
1282                 "darkturquoise",
1283                 "palevioletred"
1284         };
1285
1286         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1287 }
1288
1289 typedef struct _co_ifg_dump_t {
1290         const copy_opt_t *co;
1291         unsigned flags;
1292 } co_ifg_dump_t;
1293
1294 static void ifg_dump_graph_attr(FILE *f, void *self)
1295 {
1296         fprintf(f, "overlap=scale");
1297 }
1298
1299 static int ifg_is_dump_node(void *self, ir_node *irn)
1300 {
1301         co_ifg_dump_t *cod = self;
1302         return !arch_irn_is(cod->co->aenv, irn, ignore);
1303 }
1304
1305 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1306 {
1307         co_ifg_dump_t *env         = self;
1308         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1309         const arch_register_req_t *req;
1310         int limited;
1311
1312         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1313         limited = arch_register_req_is(req, limited);
1314
1315         if(env->flags & CO_IFG_DUMP_LABELS) {
1316                 ir_fprintf(f, "label=\"%+F", irn);
1317
1318 #if 0
1319                 // TODO fix this...
1320                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1321                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1322                         req.limited(req.limited_env, bs);
1323                         ir_fprintf(f, "\\n%B", bs);
1324                 }
1325 #endif
1326                 ir_fprintf(f, "\" ");
1327         } else {
1328                 fprintf(f, "label=\"\" shape=point " );
1329         }
1330
1331         if(env->flags & CO_IFG_DUMP_SHAPE)
1332                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1333
1334         if(env->flags & CO_IFG_DUMP_COLORS)
1335                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1336 }
1337
1338 static void ifg_dump_at_end(FILE *file, void *self)
1339 {
1340         co_ifg_dump_t *env = self;
1341         affinity_node_t *a;
1342
1343         co_gs_foreach_aff_node(env->co, a) {
1344                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1345                 unsigned aidx = get_irn_idx(a->irn);
1346                 neighb_t *n;
1347
1348                 co_gs_foreach_neighb(a, n) {
1349                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1350                         unsigned nidx = get_irn_idx(n->irn);
1351
1352                         if(aidx < nidx) {
1353                                 const char *color = nr == ar ? "blue" : "red";
1354                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1355                                 if(env->flags & CO_IFG_DUMP_LABELS)
1356                                         fprintf(file, "label=\"%d\" ", n->costs);
1357                                 if(env->flags & CO_IFG_DUMP_COLORS)
1358                                         fprintf(file, "color=%s ", color);
1359                                 else
1360                                         fprintf(file, "style=dotted");
1361                                 fprintf(file, "];\n");
1362                         }
1363                 }
1364         }
1365 }
1366
1367
1368 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1369         ifg_is_dump_node,
1370         ifg_dump_graph_attr,
1371         ifg_dump_node_attr,
1372         NULL,
1373         NULL,
1374         ifg_dump_at_end
1375 };
1376
1377
1378
1379 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1380 {
1381         co_ifg_dump_t cod;
1382
1383         cod.co    = co;
1384         cod.flags = flags;
1385         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1386 }
1387
1388
1389 void co_solve_park_moon(copy_opt_t *opt)
1390 {
1391
1392 }
1393
1394 static int void_algo(copy_opt_t *co)
1395 {
1396         return 0;
1397 }
1398
1399 /*
1400                 _    _                  _ _   _
1401            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1402           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1403          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1404         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1405                            |___/
1406 */
1407
1408 typedef struct {
1409         co_algo_t  *algo;
1410         const char *name;
1411         int        can_improve_existing;
1412 } co_algo_info_t;
1413
1414 static co_algo_info_t algos[] = {
1415         { void_algo,               "none",  0 },
1416         { co_solve_heuristic,      "heur1", 0 },
1417         { co_solve_heuristic_new,  "heur2", 0 },
1418         { co_solve_heuristic_java, "heur3", 0 },
1419         { co_solve_heuristic_mst,  "heur4", 0 },
1420 #ifdef WITH_ILP
1421         { co_solve_ilp2,           "ilp",   1 },
1422 #endif
1423         { NULL,                    "",      0 }
1424 };
1425
1426 /*
1427     __  __       _         ____       _
1428    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1429    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1430    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1431    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1432
1433 */
1434
1435 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1436 {
1437         FILE *result;
1438         char buf[1024];
1439
1440         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1441         result = fopen(buf, "wt");
1442         if(result == NULL) {
1443                 panic("Couldn't open '%s' for writing.", buf);
1444         }
1445
1446         return result;
1447 }
1448
1449 void co_driver(be_chordal_env_t *cenv)
1450 {
1451         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1452         co_complete_stats_t before, after;
1453         copy_opt_t          *co;
1454         co_algo_t           *algo_func;
1455         int                 was_optimal = 0;
1456
1457         if (algo < 0 || algo >= CO_ALGO_LAST)
1458                 return;
1459
1460         co = new_copy_opt(cenv, cost_func);
1461         co_build_ou_structure(co);
1462         co_build_graph_structure(co);
1463
1464         co_complete_stats(co, &before);
1465
1466         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1467         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1468         be_stat_ev_ull("co_max_costs",    before.max_costs);
1469         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1470         be_stat_ev_ull("co_aff_int",      before.aff_int);
1471
1472         be_stat_ev_ull("co_init_costs",   before.costs);
1473         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1474
1475         /* Dump the interference graph in Appel's format. */
1476         if (dump_flags & DUMP_APPEL) {
1477                 FILE *f = my_open(cenv, "", ".apl");
1478                 co_dump_appel_graph(co, f);
1479                 fclose(f);
1480         }
1481
1482         if (dump_flags & DUMP_BEFORE) {
1483                 FILE *f = my_open(cenv, "", "-before.dot");
1484                 co_dump_ifg_dot(co, f, style_flags);
1485                 fclose(f);
1486         }
1487
1488         /* if the algo can improve results, provide an initial solution with heur3 */
1489         if (improve && algos[algo].can_improve_existing) {
1490                 co_complete_stats_t stats;
1491
1492                 /* produce a heuristic solution */
1493 #ifdef WITH_JVM
1494                 co_solve_heuristic_java(co);
1495 #else
1496                 co_solve_heuristic(co);
1497 #endif /* WITH_JVM */
1498
1499                 /* do the stats and provide the current costs */
1500                 co_complete_stats(co, &stats);
1501                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1502         }
1503
1504 #ifdef WITH_JVM
1505         /* start the JVM here so that it does not tamper the timing. */
1506         if (algo == CO_ALGO_HEUR3)
1507                 be_java_coal_start_jvm();
1508 #endif /* WITH_JVM */
1509
1510         algo_func = algos[algo].algo;
1511
1512         /* perform actual copy minimization */
1513         lc_timer_reset_and_start(timer);
1514         was_optimal = algo_func(co);
1515         lc_timer_stop(timer);
1516
1517         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1518         be_stat_ev_ull("co_optimal", was_optimal);
1519
1520         if (dump_flags & DUMP_AFTER) {
1521                 FILE *f = my_open(cenv, "", "-after.dot");
1522                 co_dump_ifg_dot(co, f, style_flags);
1523                 fclose(f);
1524         }
1525
1526         co_complete_stats(co, &after);
1527
1528         if (do_stats) {
1529                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1530                 ulong64 evitable          = after.costs     - after.inevit_costs;
1531
1532                 ir_printf("%30F ", cenv->irg);
1533                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1534
1535                 if(optimizable_costs > 0)
1536                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1537                 else
1538                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1539         }
1540
1541         be_stat_ev_ull("co_after_costs", after.costs);
1542         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1543
1544         co_free_graph_structure(co);
1545         co_free_ou_structure(co);
1546         free_copy_opt(co);
1547 }