fixed handling of other_same/other_different constraint handling
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "benodesets.h"
64 #include "bejavacoal.h"
65 #include "bestatevent.h"
66 #include "beirg_t.h"
67 #include "error.h"
68
69 #include <libcore/lc_timing.h>
70 #include <libcore/lc_opts.h>
71 #include <libcore/lc_opts_enum.h>
72
73 #define DUMP_BEFORE 1
74 #define DUMP_AFTER  2
75 #define DUMP_APPEL  4
76 #define DUMP_ALL    2 * DUMP_APPEL - 1
77
78 #define COST_FUNC_FREQ     1
79 #define COST_FUNC_LOOP     2
80 #define COST_FUNC_ALL_ONE  3
81
82 static unsigned   dump_flags  = 0;
83 static unsigned   style_flags = 0;
84 static unsigned   do_stats    = 0;
85 static cost_fct_t cost_func   = co_get_costs_exec_freq;
86 static unsigned   algo        = CO_ALGO_HEUR4;
87 static int        improve     = 1;
88
89 static const lc_opt_enum_mask_items_t dump_items[] = {
90         { "before",  DUMP_BEFORE },
91         { "after",   DUMP_AFTER  },
92         { "appel",   DUMP_APPEL  },
93         { "all",     DUMP_ALL    },
94         { NULL,      0 }
95 };
96
97 static const lc_opt_enum_mask_items_t style_items[] = {
98         { "color",   CO_IFG_DUMP_COLORS },
99         { "labels",  CO_IFG_DUMP_LABELS },
100         { "constr",  CO_IFG_DUMP_CONSTR },
101         { "shape",   CO_IFG_DUMP_SHAPE  },
102         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
103         { NULL,      0 }
104 };
105
106 static const lc_opt_enum_mask_items_t algo_items[] = {
107         { "none",   CO_ALGO_NONE  },
108         { "heur",   CO_ALGO_HEUR  },
109         { "heur2",  CO_ALGO_HEUR2 },
110 #ifdef WITH_JVM
111         { "heur3",  CO_ALGO_HEUR3 },
112 #endif /* WITH_JVM */
113         { "heur4",  CO_ALGO_HEUR4 },
114 #ifdef WITH_ILP
115         { "ilp",    CO_ALGO_ILP   },
116 #endif /* WITH_ILP */
117         { NULL,     0 }
118 };
119
120 typedef int (*opt_funcptr)(void);
121
122 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
123         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
124         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
125         { "one",    (opt_funcptr) co_get_costs_all_one },
126         { NULL,     NULL }
127 };
128
129 static lc_opt_enum_mask_var_t dump_var = {
130         &dump_flags, dump_items
131 };
132
133 static lc_opt_enum_mask_var_t style_var = {
134         &style_flags, style_items
135 };
136
137 static lc_opt_enum_mask_var_t algo_var = {
138         &algo, algo_items
139 };
140
141 static lc_opt_enum_func_ptr_var_t cost_func_var = {
142         (opt_funcptr*) &cost_func, cost_func_items
143 };
144
145 static const lc_opt_table_entry_t options[] = {
146         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
147         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
148         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
149         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
150         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
151         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
152         { NULL }
153 };
154
155 /* Insert additional options registration functions here. */
156 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
157 extern void be_co2_register_options(lc_opt_entry_t *grp);
158 extern void be_co3_register_options(lc_opt_entry_t *grp);
159
160 void be_init_copycoal(void)
161 {
162         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
163         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
164         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
165         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
166
167         lc_opt_add_table(co_grp, options);
168 }
169
170 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
171
172 #undef QUICK_AND_DIRTY_HACK
173
174 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
175 {
176         if (env->ifg)
177                 return be_ifg_connected(env->ifg, a, b);
178         else
179                 return values_interfere(env->birg, a, b);
180 }
181
182
183 /******************************************************************************
184     _____                           _
185    / ____|                         | |
186   | |  __  ___ _ __   ___ _ __ __ _| |
187   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
188   | |__| |  __/ | | |  __/ | | (_| | |
189    \_____|\___|_| |_|\___|_|  \__,_|_|
190
191  ******************************************************************************/
192
193 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
194
195
196 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
197 {
198         const char *s1, *s2, *s3;
199         int len;
200         copy_opt_t *co;
201
202         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
203
204         co = xcalloc(1, sizeof(*co));
205         co->cenv      = chordal_env;
206         co->aenv      = chordal_env->birg->main_env->arch_env;
207         co->irg       = chordal_env->irg;
208         co->cls       = chordal_env->cls;
209         co->get_costs = get_costs;
210
211         s1 = get_irp_prog_name();
212         s2 = get_entity_name(get_irg_entity(co->irg));
213         s3 = chordal_env->cls->name;
214         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
215         co->name = xmalloc(len);
216         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
217
218         return co;
219 }
220
221 void free_copy_opt(copy_opt_t *co) {
222         xfree(co->name);
223         free(co);
224 }
225
226 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
227         const arch_register_req_t *req;
228         const arch_register_t *reg;
229
230         if (arch_irn_is(co->aenv, irn, ignore))
231                 return 0;
232
233         reg = arch_get_irn_register(co->aenv, irn);
234         if (arch_register_type_is(reg, ignore))
235                 return 0;
236
237         req = arch_get_register_req(co->aenv, irn, -1);
238         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
239                 return 1;
240
241         return 0;
242 }
243
244 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
245         int cost = 0;
246         ir_loop *loop;
247         ir_node *root_block = get_nodes_block(root);
248
249         if (is_Phi(root)) {
250                 /* for phis the copies are placed in the corresponding pred-block */
251                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
252         } else {
253                 /* a perm places the copy in the same block as it resides */
254                 loop = get_irn_loop(root_block);
255         }
256         if (loop) {
257                 int d = get_loop_depth(loop);
258                 cost = d*d;
259         }
260         return 1+cost;
261 }
262
263 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
264         int res;
265         ir_node *root_bl = get_nodes_block(root);
266         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
267         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
268
269         /* don't allow values smaller than one. */
270         return res < 1 ? 1 : res;
271 }
272
273
274 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
275         return 1;
276 }
277
278 /******************************************************************************
279    ____        _   _    _       _ _          _____ _
280   / __ \      | | | |  | |     (_) |        / ____| |
281  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
282  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
283  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
284   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
285         | |                                                            __/ |
286         |_|                                                           |___/
287  ******************************************************************************/
288
289 /**
290  * Determines a maximum weighted independent set with respect to
291  * the interference and conflict edges of all nodes in a qnode.
292  */
293 static int ou_max_ind_set_costs(unit_t *ou) {
294         be_chordal_env_t *chordal_env = ou->co->cenv;
295         ir_node **safe, **unsafe;
296         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
297         bitset_t *curr;
298         int max, pos, curr_weight, best_weight = 0;
299
300         /* assign the nodes into two groups.
301          * safe: node has no interference, hence it is in every max stable set.
302          * unsafe: node has an interference
303          */
304         safe = alloca((ou->node_count-1) * sizeof(*safe));
305         safe_costs = 0;
306         safe_count = 0;
307         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
308         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
309         unsafe_count = 0;
310         for(i=1; i<ou->node_count; ++i) {
311                 int is_safe = 1;
312                 for(o=1; o<ou->node_count; ++o) {
313                         if (i==o)
314                                 continue;
315                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
316                                 unsafe_costs[unsafe_count] = ou->costs[i];
317                                 unsafe[unsafe_count] = ou->nodes[i];
318                                 ++unsafe_count;
319                                 is_safe = 0;
320                                 break;
321                         }
322                 }
323                 if (is_safe) {
324                         safe_costs += ou->costs[i];
325                         safe[safe_count++] = ou->nodes[i];
326                 }
327         }
328
329
330         /* now compute the best set out of the unsafe nodes*/
331         if (unsafe_count > MIS_HEUR_TRIGGER) {
332                 bitset_t *best = bitset_alloca(unsafe_count);
333                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
334                 for (i=0; i<unsafe_count; ++i) {
335                         bitset_set(best, i);
336                         /* check if it is a stable set */
337                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
338                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
339                                         bitset_clear(best, i); /* clear the bit and try next one */
340                                         break;
341                                 }
342                 }
343                 /* compute the weight */
344                 bitset_foreach(best, pos)
345                         best_weight += unsafe_costs[pos];
346         } else {
347                 /* Exact Algorithm: Brute force */
348                 curr = bitset_alloca(unsafe_count);
349                 bitset_set_all(curr);
350                 while ((max = bitset_popcnt(curr)) != 0) {
351                         /* check if curr is a stable set */
352                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
353                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
354                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
355                                                         goto no_stable_set;
356
357                         /* if we arrive here, we have a stable set */
358                         /* compute the weigth of the stable set*/
359                         curr_weight = 0;
360                         bitset_foreach(curr, pos)
361                                 curr_weight += unsafe_costs[pos];
362
363                         /* any better ? */
364                         if (curr_weight > best_weight) {
365                                 best_weight = curr_weight;
366                         }
367
368         no_stable_set:
369                         bitset_minus1(curr);
370                 }
371         }
372
373         return safe_costs+best_weight;
374 }
375
376 static void co_collect_units(ir_node *irn, void *env) {
377         copy_opt_t *co = env;
378         unit_t *unit;
379
380         if (!is_curr_reg_class(co, irn))
381                 return;
382         if (!co_is_optimizable_root(co, irn))
383                 return;
384
385         /* Init a new unit */
386         unit = xcalloc(1, sizeof(*unit));
387         unit->co = co;
388         unit->node_count = 1;
389         INIT_LIST_HEAD(&unit->queue);
390
391         /* Phi with some/all of its arguments */
392         if (is_Reg_Phi(irn)) {
393                 int i, arity;
394
395                 /* init */
396                 arity = get_irn_arity(irn);
397                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
398                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
399                 unit->nodes[0] = irn;
400
401                 /* fill */
402                 for (i=0; i<arity; ++i) {
403                         int o, arg_pos;
404                         ir_node *arg = get_irn_n(irn, i);
405
406                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
407                         if (arg == irn)
408                                 continue;
409                         if (nodes_interfere(co->cenv, irn, arg)) {
410                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
411                                 continue;
412                         }
413
414                         /* Else insert the argument of the phi to the members of this ou */
415                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
416
417                         if (! arch_irn_is(co->aenv, arg, ignore)) {
418                                 /* Check if arg has occurred at a prior position in the arg/list */
419                                 arg_pos = 0;
420                                 for (o=1; o<unit->node_count; ++o) {
421                                         if (unit->nodes[o] == arg) {
422                                                 arg_pos = o;
423                                                 break;
424                                         }
425                                 }
426
427                                 if (!arg_pos) { /* a new argument */
428                                         /* insert node, set costs */
429                                         unit->nodes[unit->node_count] = arg;
430                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
431                                         unit->node_count++;
432                                 } else { /* arg has occured before in same phi */
433                                         /* increase costs for existing arg */
434                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
435                                 }
436                         }
437                 }
438                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
439                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
440         } else
441
442         /* Proj of a perm with corresponding arg */
443         if (is_Perm_Proj(co->aenv, irn)) {
444                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
445                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
446                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
447                 unit->node_count = 2;
448                 unit->nodes[0] = irn;
449                 unit->nodes[1] = get_Perm_src(irn);
450                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
451         } else {
452                 const arch_register_req_t *req =
453                         arch_get_register_req(co->aenv, irn, -1);
454
455                 /* Src == Tgt of a 2-addr-code instruction */
456                 if (is_2addr_code(req)) {
457                         ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
458                         if (!arch_irn_is(co->aenv, other, ignore) &&
459                                         !nodes_interfere(co->cenv, irn, other)) {
460                                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
461                                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
462                                 unit->node_count = 2;
463                                 unit->nodes[0] = irn;
464                                 unit->nodes[1] = other;
465                                 unit->costs[1] = co->get_costs(co, irn, other, -1);
466                         }
467                 } else {
468                         assert(0 && "This is not an optimizable node!");
469                 }
470         }
471
472         /* Insert the new unit at a position according to its costs */
473         if (unit->node_count > 1) {
474                 int i;
475                 struct list_head *tmp;
476
477                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
478                 for(i=1; i<unit->node_count; ++i) {
479                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
480                         unit->all_nodes_costs += unit->costs[i];
481                 }
482
483                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
484                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
485                 /* Insert the new ou according to its sort_key */
486                 tmp = &co->units;
487                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
488                         tmp = tmp->next;
489                 list_add(&unit->units, tmp);
490         } else {
491                 free(unit);
492         }
493 }
494
495 #ifdef QUICK_AND_DIRTY_HACK
496
497 static int compare_ous(const void *k1, const void *k2) {
498         const unit_t *u1 = *((const unit_t **) k1);
499         const unit_t *u2 = *((const unit_t **) k2);
500         int i, o, u1_has_constr, u2_has_constr;
501         arch_register_req_t req;
502         const arch_env_t *aenv = u1->co->aenv;
503
504         /* Units with constraints come first */
505         u1_has_constr = 0;
506         for (i=0; i<u1->node_count; ++i) {
507                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
508                 if (arch_register_req_is(&req, limited)) {
509                         u1_has_constr = 1;
510                         break;
511                 }
512         }
513
514         u2_has_constr = 0;
515         for (i=0; i<u2->node_count; ++i) {
516                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
517                 if (arch_register_req_is(&req, limited)) {
518                         u2_has_constr = 1;
519                         break;
520                 }
521         }
522
523         if (u1_has_constr != u2_has_constr)
524                 return u2_has_constr - u1_has_constr;
525
526         /* Now check, whether the two units are connected */
527 #if 0
528         for (i=0; i<u1->node_count; ++i)
529                 for (o=0; o<u2->node_count; ++o)
530                         if (u1->nodes[i] == u2->nodes[o])
531                                 return 0;
532 #endif
533
534         /* After all, the sort key decides. Greater keys come first. */
535         return u2->sort_key - u1->sort_key;
536
537 }
538
539 /**
540  * Sort the ou's according to constraints and their sort_key
541  */
542 static void co_sort_units(copy_opt_t *co) {
543         int i, count = 0, costs;
544         unit_t *ou, **ous;
545
546         /* get the number of ous, remove them form the list and fill the array */
547         list_for_each_entry(unit_t, ou, &co->units, units)
548                 count++;
549         ous = alloca(count * sizeof(*ous));
550
551         costs = co_get_max_copy_costs(co);
552
553         i = 0;
554         list_for_each_entry(unit_t, ou, &co->units, units)
555                 ous[i++] = ou;
556
557         INIT_LIST_HEAD(&co->units);
558
559         assert(count == i && list_empty(&co->units));
560
561         for (i=0; i<count; ++i)
562                 ir_printf("%+F\n", ous[i]->nodes[0]);
563
564         qsort(ous, count, sizeof(*ous), compare_ous);
565
566         ir_printf("\n\n");
567         for (i=0; i<count; ++i)
568                 ir_printf("%+F\n", ous[i]->nodes[0]);
569
570         /* reinsert into list in correct order */
571         for (i=0; i<count; ++i)
572                 list_add_tail(&ous[i]->units, &co->units);
573
574         assert(costs == co_get_max_copy_costs(co));
575 }
576 #endif
577
578 void co_build_ou_structure(copy_opt_t *co) {
579         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
580         INIT_LIST_HEAD(&co->units);
581         irg_walk_graph(co->irg, co_collect_units, NULL, co);
582 #ifdef QUICK_AND_DIRTY_HACK
583         co_sort_units(co);
584 #endif
585 }
586
587 void co_free_ou_structure(copy_opt_t *co) {
588         unit_t *curr, *tmp;
589         ASSERT_OU_AVAIL(co);
590         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
591                 xfree(curr->nodes);
592                 xfree(curr->costs);
593                 xfree(curr);
594         }
595         co->units.next = NULL;
596 }
597
598 /* co_solve_heuristic() is implemented in becopyheur.c */
599
600 int co_get_max_copy_costs(const copy_opt_t *co) {
601         int i, res = 0;
602         unit_t *curr;
603
604         ASSERT_OU_AVAIL(co);
605
606         list_for_each_entry(unit_t, curr, &co->units, units) {
607                 res += curr->inevitable_costs;
608                 for (i=1; i<curr->node_count; ++i)
609                         res += curr->costs[i];
610         }
611         return res;
612 }
613
614 int co_get_inevit_copy_costs(const copy_opt_t *co) {
615         int res = 0;
616         unit_t *curr;
617
618         ASSERT_OU_AVAIL(co);
619
620         list_for_each_entry(unit_t, curr, &co->units, units)
621                 res += curr->inevitable_costs;
622         return res;
623 }
624
625 int co_get_copy_costs(const copy_opt_t *co) {
626         int i, res = 0;
627         unit_t *curr;
628
629         ASSERT_OU_AVAIL(co);
630
631         list_for_each_entry(unit_t, curr, &co->units, units) {
632                 int root_col = get_irn_col(co, curr->nodes[0]);
633                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
634                 res += curr->inevitable_costs;
635                 for (i=1; i<curr->node_count; ++i) {
636                         int arg_col = get_irn_col(co, curr->nodes[i]);
637                         if (root_col != arg_col) {
638                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
639                                 res += curr->costs[i];
640                         }
641                 }
642         }
643         return res;
644 }
645
646 int co_get_lower_bound(const copy_opt_t *co) {
647         int res = 0;
648         unit_t *curr;
649
650         ASSERT_OU_AVAIL(co);
651
652         list_for_each_entry(unit_t, curr, &co->units, units)
653                 res += curr->inevitable_costs + curr->min_nodes_costs;
654         return res;
655 }
656
657 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
658 {
659         bitset_t *seen = bitset_irg_malloc(co->irg);
660         affinity_node_t *an;
661
662         memset(stat, 0, sizeof(stat[0]));
663
664         /* count affinity edges. */
665         co_gs_foreach_aff_node(co, an) {
666                 neighb_t *neigh;
667                 stat->aff_nodes += 1;
668                 bitset_add_irn(seen, an->irn);
669                 co_gs_foreach_neighb(an, neigh) {
670                         if(!bitset_contains_irn(seen, neigh->irn)) {
671                                 stat->aff_edges += 1;
672                                 stat->max_costs += neigh->costs;
673
674                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
675                                         stat->costs += neigh->costs;
676                                         stat->unsatisfied_edges += 1;
677                                 }
678
679                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
680                                         stat->aff_int += 1;
681                                         stat->inevit_costs += neigh->costs;
682                                 }
683
684                         }
685                 }
686         }
687
688         bitset_free(seen);
689 }
690
691 /******************************************************************************
692    _____                 _        _____ _
693   / ____|               | |      / ____| |
694  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
695  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
696  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
697   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
698                   | |                                       __/ |
699                   |_|                                      |___/
700  ******************************************************************************/
701
702 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
703         const affinity_node_t *n1 = k1;
704         const affinity_node_t *n2 = k2;
705
706         return (n1->irn != n2->irn);
707 }
708
709 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
710         affinity_node_t new_node, *node;
711         neighb_t new_nbr, *nbr;
712         int allocnew;
713
714         new_node.irn        = n1;
715         new_node.degree     = 0;
716         new_node.neighbours = NULL;
717         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
718
719         allocnew = 1;
720         for (nbr = node->neighbours; nbr; nbr = nbr->next)
721                 if (nbr->irn == n2) {
722                         allocnew = 0;
723                         break;
724                 }
725
726         /* if we did not find n2 in n1's neighbourhood insert it */
727         if (allocnew) {
728                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
729                 nbr = obstack_finish(&co->obst);
730                 nbr->irn   = n2;
731                 nbr->costs = 0;
732                 nbr->next  = node->neighbours;
733                 node->neighbours = nbr;
734                 node->degree++;
735         }
736
737         /* now nbr points to n1's neighbour-entry of n2 */
738         nbr->costs += costs;
739 }
740
741 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
742         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
743                 add_edge(co, n1, n2, costs);
744                 add_edge(co, n2, n1, costs);
745         }
746 }
747
748 static void build_graph_walker(ir_node *irn, void *env) {
749         copy_opt_t *co = env;
750         int pos, max;
751         const arch_register_t *reg;
752
753         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
754                 return;
755
756         reg = arch_get_irn_register(co->aenv, irn);
757         if (arch_register_type_is(reg, ignore))
758                 return;
759
760         /* Phis */
761         if (is_Reg_Phi(irn))
762                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
763                         ir_node *arg = get_irn_n(irn, pos);
764                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
765                 }
766
767         /* Perms */
768         else if (is_Perm_Proj(co->aenv, irn)) {
769                 ir_node *arg = get_Perm_src(irn);
770                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
771         }
772
773         /* 2-address code */
774         else {
775                 const arch_register_req_t *req =
776                         arch_get_register_req(co->aenv, irn, -1);
777                 if (is_2addr_code(req)) {
778                         ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
779                         if (! arch_irn_is(co->aenv, other, ignore))
780                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
781                 }
782         }
783 }
784
785 void co_build_graph_structure(copy_opt_t *co) {
786         obstack_init(&co->obst);
787         co->nodes = new_set(compare_affinity_node_t, 32);
788
789         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
790 }
791
792 void co_free_graph_structure(copy_opt_t *co) {
793         ASSERT_GS_AVAIL(co);
794
795         del_set(co->nodes);
796         obstack_free(&co->obst, NULL);
797         co->nodes = NULL;
798 }
799
800 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
801
802 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
803         affinity_node_t new_node, *n;
804
805         ASSERT_GS_AVAIL(co);
806
807         new_node.irn = irn;
808         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
809         if (n) {
810                 return (n->degree > 0);
811         } else
812                 return 0;
813 }
814
815 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
816 {
817         be_ifg_t *ifg  = co->cenv->ifg;
818         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
819
820         ir_node *irn;
821         void *it, *nit;
822         int i, n, n_regs;
823
824         n_regs = 0;
825         for(i = 0; i < co->cls->n_regs; ++i) {
826                 const arch_register_t *reg = &co->cls->regs[i];
827                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
828         }
829
830         /*
831          * n contains the first node number.
832          * the values below n are the pre-colored register nodes
833          */
834
835         it  = be_ifg_nodes_iter_alloca(ifg);
836         nit = be_ifg_neighbours_iter_alloca(ifg);
837
838         n = n_regs;
839         be_ifg_foreach_node(ifg, it, irn) {
840                 if(!arch_irn_is(co->aenv, irn, ignore))
841                         set_irn_link(irn, INT_TO_PTR(n++));
842         }
843
844         fprintf(f, "%d %d\n", n, n_regs);
845
846         be_ifg_foreach_node(ifg, it, irn) {
847                 if(!arch_irn_is(co->aenv, irn, ignore)) {
848                         int idx            = PTR_TO_INT(get_irn_link(irn));
849                         affinity_node_t *a = get_affinity_info(co, irn);
850
851                         const arch_register_req_t *req;
852                         ir_node *adj;
853
854                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
855                         if(arch_register_req_is(req, limited)) {
856                                 for(i = 0; i < co->cls->n_regs; ++i) {
857                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
858                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
859                                 }
860                         }
861
862
863                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
864                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
865                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
866                                         if(idx < adj_idx)
867                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
868                                 }
869                         }
870
871                         if(a) {
872                                 neighb_t *n;
873
874                                 co_gs_foreach_neighb(a, n) {
875                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
876                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
877                                                 if(idx < n_idx)
878                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
879                                         }
880                                 }
881                         }
882                 }
883         }
884 }
885
886 typedef struct _appel_clique_walker_t {
887         ir_phase ph;
888         const copy_opt_t *co;
889         int curr_nr;
890         int node_count;
891         FILE *f;
892         int dumb;
893         int *color_map;
894         struct obstack obst;
895 } appel_clique_walker_t;
896
897 typedef struct _appel_block_info_t {
898         int *live_end_nr;
899         int *live_in_nr;
900         int *phi_nr;
901         ir_node **live_end;
902         ir_node **live_in;
903         ir_node **phi;
904         int n_live_end;
905         int n_live_in;
906         int n_phi;
907 } appel_block_info_t;
908
909 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
910 {
911 #if 0
912         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
913         int res = (int) freq;
914         return res == 0 ? 1 : res;
915 #else
916         ir_loop *loop = get_irn_loop(bl);
917         if(loop) {
918                 int d = get_loop_depth(loop);
919                 return 1 + d * d;
920         }
921         return 1;
922 #endif
923 }
924
925 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
926 {
927         appel_block_info_t *res = NULL;
928
929         if(is_Block(irn)) {
930                 appel_clique_walker_t *d = (void *) phase;
931                 res = phase_alloc(phase, sizeof(res[0]));
932                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
933                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
934                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
935                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
936                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
937                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
938         }
939
940         return res;
941 }
942
943 typedef struct _insn_list_t {
944         be_insn_t *insn;
945         struct list_head list;
946 } insn_list_t;
947
948 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
949 {
950         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
951         int i;
952
953         for(i = 0; i < bli->n_live_end; ++i)
954                 if(bli->live_end[i] == irn)
955                         return bli->live_end_nr[i];
956
957         return -1;
958 }
959
960 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
961 {
962         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
963         ir_node *irn;
964         int n_live;
965         int j;
966
967         n_live = 0;
968         foreach_pset(live, irn)
969                 live_arr[n_live++] = irn;
970
971         /* dump the live after clique */
972         if(!env->dumb) {
973                 for(j = 0; j < n_live; ++j) {
974                         int k;
975
976                         for(k = j + 1; k < n_live; ++k) {
977                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
978                         }
979                         fprintf(env->f, "\n");
980                 }
981         }
982
983         /* dump the affinities */
984         for(j = 0; !env->dumb && j < n_live; ++j) {
985                 ir_node *irn = live_arr[j];
986                 int old_nr = PTR_TO_INT(get_irn_link(irn));
987
988                 /* if the node was already live in the last insn dump the affinity */
989                 if(old_nr > start_nr) {
990                         int weight = appel_aff_weight(env, bl);
991                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
992                 }
993         }
994
995         /* set the current numbers into the link field. */
996         for(j = 0; j < n_live; ++j) {
997                 ir_node *irn = live_arr[j];
998                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
999         }
1000
1001         return curr_nr + n_live;
1002 }
1003
1004 static void appel_walker(ir_node *bl, void *data)
1005 {
1006         appel_clique_walker_t *env = data;
1007         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1008         struct obstack *obst       = &env->obst;
1009         void *base                 = obstack_base(obst);
1010         pset *live                 = pset_new_ptr_default();
1011         be_lv_t *lv                = env->co->cenv->birg->lv;
1012
1013         int n_insns  = 0;
1014         int n_nodes  = 0;
1015         int start_nr = env->curr_nr;
1016         int curr_nr  = start_nr;
1017
1018         be_insn_env_t insn_env;
1019         int i, j;
1020         ir_node *irn;
1021         be_insn_t **insns;
1022
1023         insn_env.aenv = env->co->aenv;
1024         insn_env.cls  = env->co->cls;
1025         insn_env.obst = obst;
1026         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1027
1028         /* Guess how many insns will be in this block. */
1029         sched_foreach(bl, irn)
1030                 n_nodes++;
1031
1032         bli->n_phi = 0;
1033         insns = malloc(n_nodes * sizeof(insns[0]));
1034
1035         /* Put all insns in an array. */
1036         irn = sched_first(bl);
1037         while(!sched_is_end(irn)) {
1038                 be_insn_t *insn;
1039                 insn = be_scan_insn(&insn_env, irn);
1040                 insns[n_insns++] = insn;
1041                 irn = insn->next_insn;
1042         }
1043
1044         DBG((dbg, LEVEL_2, "%+F\n", bl));
1045         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live);
1046
1047         /* Generate the bad and ugly. */
1048         for(i = n_insns - 1; i >= 0; --i) {
1049                 be_insn_t *insn = insns[i];
1050
1051                 /* The first live set has to be saved in the block border set. */
1052                 if(i == n_insns - 1) {
1053                         j = 0;
1054                         foreach_pset(live, irn) {
1055                                 bli->live_end[j]    = irn;
1056                                 bli->live_end_nr[j] = curr_nr + j;
1057                                 ++j;
1058                         }
1059                         bli->n_live_end = j;
1060                 }
1061
1062                 if(!env->dumb) {
1063                         for(j = 0; j < insn->use_start; ++j) {
1064                                 ir_node *op   = insn->ops[j].carrier;
1065                                 bitset_t *adm = insn->ops[j].regs;
1066                                 int k;
1067                                 int nr;
1068
1069                                 if(!insn->ops[j].has_constraints)
1070                                         continue;
1071
1072                                 nr = 0;
1073                                 foreach_pset(live, irn) {
1074                                         if(irn == op) {
1075                                                 pset_break(live);
1076                                                 break;
1077                                         }
1078                                         ++nr;
1079                                 }
1080
1081                                 assert(nr < pset_count(live));
1082
1083                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1084                                         int mapped_col = env->color_map[k];
1085                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1086                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1087                                 }
1088                         }
1089                 }
1090
1091                 /* dump the clique and update the stuff. */
1092                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1093
1094                 /* remove all defs. */
1095                 for(j = 0; j < insn->use_start; ++j)
1096                         pset_remove_ptr(live, insn->ops[j].carrier);
1097
1098                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1099                         bli->phi[bli->n_phi]    = insn->irn;
1100                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1101                         bli->n_phi++;
1102                 }
1103
1104                 /* add all uses */
1105                 else
1106                         for(j = insn->use_start; j < insn->n_ops; ++j)
1107                                 pset_insert_ptr(live, insn->ops[j].carrier);
1108         }
1109
1110         /* print the start clique. */
1111         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1112
1113         i = 0;
1114         foreach_pset(live, irn) {
1115                 bli->live_in[i]    = irn;
1116                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1117                 ++i;
1118         }
1119         bli->n_live_in = i;
1120
1121         del_pset(live);
1122         free(insns);
1123         obstack_free(obst, base);
1124         env->curr_nr = curr_nr;
1125 }
1126
1127 static void appel_inter_block_aff(ir_node *bl, void *data)
1128 {
1129         appel_clique_walker_t *env = data;
1130         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1131
1132         int i, j, n;
1133
1134         for(i = 0; i < bli->n_live_in; ++i) {
1135                 ir_node *irn = bli->live_in[i];
1136
1137                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1138                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1139
1140                         int nr = appel_get_live_end_nr(env, pred, irn);
1141                         assert(nr >= 0);
1142                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1143                 }
1144         }
1145
1146         for(i = 0; i < bli->n_phi; ++i) {
1147                 ir_node *irn = bli->phi[i];
1148
1149                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1150                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1151                         ir_node *op = get_irn_n(irn, j);
1152
1153                         int nr = appel_get_live_end_nr(env, pred, op);
1154                         assert(nr >= 0);
1155                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1156                 }
1157         }
1158
1159 }
1160
1161 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1162 {
1163         int i;
1164         int n_colors;
1165         appel_clique_walker_t env;
1166         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1167         be_lv_t *lv = co->cenv->birg->lv;
1168
1169         be_liveness_recompute(lv);
1170         obstack_init(&env.obst);
1171         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1172         env.curr_nr = co->cls->n_regs;
1173         env.co = co;
1174         env.f = f;
1175
1176         bitset_copy(adm, co->cenv->ignore_colors);
1177         bitset_flip_all(adm);
1178
1179         /* Make color map. */
1180         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1181         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1182                 const arch_register_t *reg = &co->cls->regs[i];
1183                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1184         }
1185
1186         env.dumb = 1;
1187         env.curr_nr = n_colors;
1188         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1189         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1190
1191         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1192
1193         /* make the first k nodes interfere */
1194         for(i = 0; i < n_colors; ++i) {
1195                 int j;
1196                 for(j = i + 1; j < n_colors; ++j)
1197                         fprintf(f, "%d %d -1 ", i, j);
1198                 fprintf(f, "\n");
1199         }
1200
1201         env.dumb = 0;
1202         env.curr_nr = n_colors;
1203         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1204         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1205         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1206         obstack_free(&env.obst, NULL);
1207 }
1208
1209 /*
1210          ___ _____ ____   ____   ___ _____   ____                        _
1211         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1212          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1213          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1214         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1215                                                                   |_|            |___/
1216 */
1217
1218 static const char *get_dot_color_name(int col)
1219 {
1220         static const char *names[] = {
1221                 "blue",
1222                 "red",
1223                 "green",
1224                 "yellow",
1225                 "cyan",
1226                 "magenta",
1227                 "orange",
1228                 "chocolate",
1229                 "beige",
1230                 "navy",
1231                 "darkgreen",
1232                 "darkred",
1233                 "lightPink",
1234                 "chartreuse",
1235                 "lightskyblue",
1236                 "linen",
1237                 "pink",
1238                 "lightslateblue",
1239                 "mintcream",
1240                 "red",
1241                 "darkolivegreen",
1242                 "mediumblue",
1243                 "mistyrose",
1244                 "salmon",
1245                 "darkseagreen",
1246                 "mediumslateblue"
1247                 "moccasin",
1248                 "tomato",
1249                 "forestgreen",
1250                 "darkturquoise",
1251                 "palevioletred"
1252         };
1253
1254         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1255 }
1256
1257 typedef struct _co_ifg_dump_t {
1258         const copy_opt_t *co;
1259         unsigned flags;
1260 } co_ifg_dump_t;
1261
1262 static void ifg_dump_graph_attr(FILE *f, void *self)
1263 {
1264         fprintf(f, "overlap=scale");
1265 }
1266
1267 static int ifg_is_dump_node(void *self, ir_node *irn)
1268 {
1269         co_ifg_dump_t *cod = self;
1270         return !arch_irn_is(cod->co->aenv, irn, ignore);
1271 }
1272
1273 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1274 {
1275         co_ifg_dump_t *env         = self;
1276         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1277         const arch_register_req_t *req;
1278         int limited;
1279
1280         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1281         limited = arch_register_req_is(req, limited);
1282
1283         if(env->flags & CO_IFG_DUMP_LABELS) {
1284                 ir_fprintf(f, "label=\"%+F", irn);
1285
1286 #if 0
1287                 // TODO fix this...
1288                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1289                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1290                         req.limited(req.limited_env, bs);
1291                         ir_fprintf(f, "\\n%B", bs);
1292                 }
1293 #endif
1294                 ir_fprintf(f, "\" ");
1295         } else {
1296                 fprintf(f, "label=\"\" shape=point " );
1297         }
1298
1299         if(env->flags & CO_IFG_DUMP_SHAPE)
1300                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1301
1302         if(env->flags & CO_IFG_DUMP_COLORS)
1303                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1304 }
1305
1306 static void ifg_dump_at_end(FILE *file, void *self)
1307 {
1308         co_ifg_dump_t *env = self;
1309         affinity_node_t *a;
1310
1311         co_gs_foreach_aff_node(env->co, a) {
1312                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1313                 unsigned aidx = get_irn_idx(a->irn);
1314                 neighb_t *n;
1315
1316                 co_gs_foreach_neighb(a, n) {
1317                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1318                         unsigned nidx = get_irn_idx(n->irn);
1319
1320                         if(aidx < nidx) {
1321                                 const char *color = nr == ar ? "blue" : "red";
1322                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1323                                 if(env->flags & CO_IFG_DUMP_LABELS)
1324                                         fprintf(file, "label=\"%d\" ", n->costs);
1325                                 if(env->flags & CO_IFG_DUMP_COLORS)
1326                                         fprintf(file, "color=%s ", color);
1327                                 else
1328                                         fprintf(file, "style=dotted");
1329                                 fprintf(file, "];\n");
1330                         }
1331                 }
1332         }
1333 }
1334
1335
1336 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1337         ifg_is_dump_node,
1338         ifg_dump_graph_attr,
1339         ifg_dump_node_attr,
1340         NULL,
1341         NULL,
1342         ifg_dump_at_end
1343 };
1344
1345
1346
1347 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1348 {
1349         co_ifg_dump_t cod;
1350
1351         cod.co    = co;
1352         cod.flags = flags;
1353         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1354 }
1355
1356
1357 void co_solve_park_moon(copy_opt_t *opt)
1358 {
1359
1360 }
1361
1362 static int void_algo(copy_opt_t *co)
1363 {
1364         return 0;
1365 }
1366
1367 /*
1368                 _    _                  _ _   _
1369            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1370           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1371          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1372         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1373                            |___/
1374 */
1375
1376 typedef struct {
1377         co_algo_t  *algo;
1378         const char *name;
1379         int        can_improve_existing;
1380 } co_algo_info_t;
1381
1382 static co_algo_info_t algos[] = {
1383         { void_algo,               "none",  0 },
1384         { co_solve_heuristic,      "heur1", 0 },
1385         { co_solve_heuristic_new,  "heur2", 0 },
1386         { co_solve_heuristic_java, "heur3", 0 },
1387         { co_solve_heuristic_mst,  "heur4", 0 },
1388 #ifdef WITH_ILP
1389         { co_solve_ilp2,           "ilp",   1 },
1390 #endif
1391         { NULL,                    "",      0 }
1392 };
1393
1394 /*
1395     __  __       _         ____       _
1396    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1397    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1398    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1399    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1400
1401 */
1402
1403 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1404 {
1405         FILE *result;
1406         char buf[1024];
1407
1408         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1409         result = fopen(buf, "wt");
1410         if(result == NULL) {
1411                 panic("Couldn't open '%s' for writing.", buf);
1412         }
1413
1414         return result;
1415 }
1416
1417 void co_driver(be_chordal_env_t *cenv)
1418 {
1419         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1420         co_complete_stats_t before, after;
1421         copy_opt_t          *co;
1422         co_algo_t           *algo_func;
1423         int                 was_optimal = 0;
1424
1425         if (algo < 0 || algo >= CO_ALGO_LAST)
1426                 return;
1427
1428         co = new_copy_opt(cenv, cost_func);
1429         co_build_ou_structure(co);
1430         co_build_graph_structure(co);
1431
1432         co_complete_stats(co, &before);
1433
1434         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1435         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1436         be_stat_ev_ull("co_max_costs",    before.max_costs);
1437         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1438         be_stat_ev_ull("co_aff_int",      before.aff_int);
1439
1440         be_stat_ev_ull("co_init_costs",   before.costs);
1441         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1442
1443         /* Dump the interference graph in Appel's format. */
1444         if (dump_flags & DUMP_APPEL) {
1445                 FILE *f = my_open(cenv, "", ".apl");
1446                 co_dump_appel_graph(co, f);
1447                 fclose(f);
1448         }
1449
1450         if (dump_flags & DUMP_BEFORE) {
1451                 FILE *f = my_open(cenv, "", "-before.dot");
1452                 co_dump_ifg_dot(co, f, style_flags);
1453                 fclose(f);
1454         }
1455
1456         /* if the algo can improve results, provide an initial solution with heur3 */
1457         if (improve && algos[algo].can_improve_existing) {
1458                 co_complete_stats_t stats;
1459
1460                 /* produce a heuristic solution */
1461 #ifdef WITH_JVM
1462                 co_solve_heuristic_java(co);
1463 #else
1464                 co_solve_heuristic(co);
1465 #endif /* WITH_JVM */
1466
1467                 /* do the stats and provide the current costs */
1468                 co_complete_stats(co, &stats);
1469                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1470         }
1471
1472 #ifdef WITH_JVM
1473         /* start the JVM here so that it does not tamper the timing. */
1474         if (algo == CO_ALGO_HEUR3)
1475                 be_java_coal_start_jvm();
1476 #endif /* WITH_JVM */
1477
1478         algo_func = algos[algo].algo;
1479
1480         /* perform actual copy minimization */
1481         lc_timer_reset_and_start(timer);
1482         was_optimal = algo_func(co);
1483         lc_timer_stop(timer);
1484
1485         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1486         be_stat_ev_ull("co_optimal", was_optimal);
1487
1488         if (dump_flags & DUMP_AFTER) {
1489                 FILE *f = my_open(cenv, "", "-after.dot");
1490                 co_dump_ifg_dot(co, f, style_flags);
1491                 fclose(f);
1492         }
1493
1494         co_complete_stats(co, &after);
1495
1496         if (do_stats) {
1497                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1498                 ulong64 evitable          = after.costs     - after.inevit_costs;
1499
1500                 ir_printf("%30F ", cenv->irg);
1501                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1502
1503                 if(optimizable_costs > 0)
1504                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1505                 else
1506                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1507         }
1508
1509         be_stat_ev_ull("co_after_costs", after.costs);
1510         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1511
1512         co_free_graph_structure(co);
1513         co_free_ou_structure(co);
1514         free_copy_opt(co);
1515 }