5c5e169849aebdb768840cdceea6e96d63e49ebf
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * Author:      Daniel Grund
22  * Date:                12.04.2005
23  * Copyright:   (c) Universitaet Karlsruhe
24  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
25  */
26 #ifdef HAVE_CONFIG_H
27 #include "config.h"
28 #endif
29
30 #include "execfreq.h"
31 #include "xmalloc.h"
32 #include "debug.h"
33 #include "pmap.h"
34 #include "raw_bitset.h"
35 #include "irgraph.h"
36 #include "irgwalk.h"
37 #include "irprog.h"
38 #include "irloop_t.h"
39 #include "iredges_t.h"
40 #include "phiclass.h"
41 #include "irbitset.h"
42 #include "irphase_t.h"
43 #include "irprintf_t.h"
44
45 #include "bemodule.h"
46 #include "bearch_t.h"
47 #include "benode_t.h"
48 #include "beutil.h"
49 #include "beifg_t.h"
50 #include "becopyopt_t.h"
51 #include "becopystat.h"
52 #include "belive_t.h"
53 #include "beinsn_t.h"
54 #include "besched_t.h"
55 #include "benodesets.h"
56 #include "bejavacoal.h"
57 #include "bestatevent.h"
58 #include "beirg_t.h"
59 #include "error.h"
60
61 #include <libcore/lc_timing.h>
62 #include <libcore/lc_opts.h>
63 #include <libcore/lc_opts_enum.h>
64
65 #define DUMP_BEFORE 1
66 #define DUMP_AFTER  2
67 #define DUMP_APPEL  4
68 #define DUMP_ALL    2 * DUMP_APPEL - 1
69
70 #define COST_FUNC_FREQ     1
71 #define COST_FUNC_LOOP     2
72 #define COST_FUNC_ALL_ONE  3
73
74 static unsigned   dump_flags  = 0;
75 static unsigned   style_flags = 0;
76 static unsigned   do_stats    = 0;
77 static cost_fct_t cost_func   = co_get_costs_exec_freq;
78 static unsigned   algo        = CO_ALGO_HEUR;
79 static int        improve     = 1;
80
81 static const lc_opt_enum_mask_items_t dump_items[] = {
82         { "before",  DUMP_BEFORE },
83         { "after",   DUMP_AFTER  },
84         { "appel",   DUMP_APPEL  },
85         { "all",     DUMP_ALL    },
86         { NULL,      0 }
87 };
88
89 static const lc_opt_enum_mask_items_t style_items[] = {
90         { "color",   CO_IFG_DUMP_COLORS },
91         { "labels",  CO_IFG_DUMP_LABELS },
92         { "constr",  CO_IFG_DUMP_CONSTR },
93         { "shape",   CO_IFG_DUMP_SHAPE  },
94         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
95         { NULL,      0 }
96 };
97
98 static const lc_opt_enum_mask_items_t algo_items[] = {
99         { "none",   CO_ALGO_NONE  },
100         { "heur",   CO_ALGO_HEUR  },
101         { "heur2",  CO_ALGO_HEUR2 },
102 #ifdef WITH_JVM
103         { "heur3",  CO_ALGO_HEUR3 },
104 #endif /* WITH_JVM */
105         { "heur4",  CO_ALGO_HEUR4 },
106 #ifdef WITH_ILP
107         { "ilp",    CO_ALGO_ILP   },
108 #endif /* WITH_ILP */
109         { NULL,     0 }
110 };
111
112 typedef int (*opt_funcptr)(void);
113
114 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
115         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
116         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
117         { "one",    (opt_funcptr) co_get_costs_all_one },
118         { NULL,     NULL }
119 };
120
121 static lc_opt_enum_mask_var_t dump_var = {
122         &dump_flags, dump_items
123 };
124
125 static lc_opt_enum_mask_var_t style_var = {
126         &style_flags, style_items
127 };
128
129 static lc_opt_enum_mask_var_t algo_var = {
130         &algo, algo_items
131 };
132
133 static lc_opt_enum_func_ptr_var_t cost_func_var = {
134         (opt_funcptr*) &cost_func, cost_func_items
135 };
136
137 static const lc_opt_table_entry_t options[] = {
138         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
139         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
140         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
141         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
142         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
143         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
144         { NULL }
145 };
146
147 /* Insert additional options registration functions here. */
148 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
149 extern void be_co2_register_options(lc_opt_entry_t *grp);
150 extern void be_co3_register_options(lc_opt_entry_t *grp);
151
152 void be_init_copycoal(void)
153 {
154         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
155         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
156         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
157         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
158
159         lc_opt_add_table(co_grp, options);
160 }
161
162 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
163
164 #undef QUICK_AND_DIRTY_HACK
165
166 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
167 {
168         if (env->ifg)
169                 return be_ifg_connected(env->ifg, a, b);
170         else
171                 return values_interfere(env->birg->lv, a, b);
172 }
173
174
175 /******************************************************************************
176     _____                           _
177    / ____|                         | |
178   | |  __  ___ _ __   ___ _ __ __ _| |
179   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
180   | |__| |  __/ | | |  __/ | | (_| | |
181    \_____|\___|_| |_|\___|_|  \__,_|_|
182
183  ******************************************************************************/
184
185 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
186
187
188 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
189 {
190         const char *s1, *s2, *s3;
191         int len;
192         copy_opt_t *co;
193
194         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
195
196         co = xcalloc(1, sizeof(*co));
197         co->cenv      = chordal_env;
198         co->aenv      = chordal_env->birg->main_env->arch_env;
199         co->irg       = chordal_env->irg;
200         co->cls       = chordal_env->cls;
201         co->get_costs = get_costs;
202
203         s1 = get_irp_prog_name();
204         s2 = get_entity_name(get_irg_entity(co->irg));
205         s3 = chordal_env->cls->name;
206         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
207         co->name = xmalloc(len);
208         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
209
210         return co;
211 }
212
213 void free_copy_opt(copy_opt_t *co) {
214         xfree(co->name);
215         free(co);
216 }
217
218 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
219         const arch_register_req_t *req;
220         const arch_register_t *reg;
221
222         if (arch_irn_is(co->aenv, irn, ignore))
223                 return 0;
224
225         reg = arch_get_irn_register(co->aenv, irn);
226         if (arch_register_type_is(reg, ignore))
227                 return 0;
228
229         req = arch_get_register_req(co->aenv, irn, -1);
230         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
231                 return 1;
232
233         return 0;
234 }
235
236 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
237         const ir_edge_t *edge;
238         const arch_register_t *reg;
239
240         assert(0 && "Is buggy and obsolete. Do not use");
241
242         if (arch_irn_is(co->aenv, irn, ignore))
243                 return 0;
244
245         reg = arch_get_irn_register(co->aenv, irn);
246         if (arch_register_type_is(reg, ignore))
247                 return 0;
248
249         foreach_out_edge(irn, edge) {
250                 ir_node *n = edge->src;
251
252                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
253                         const arch_register_req_t *req;
254                         req = arch_get_register_req(co->aenv, n, -1);
255
256                         if(is_Reg_Phi(n) ||
257                            is_Perm(co->aenv, n) ||
258                            (arch_register_req_is(req, should_be_same))) {
259                                 ir_node *other = get_irn_n(irn, req->other_same);
260                                 if(other == irn)
261                                         return 1;
262                         }
263                 }
264         }
265
266         return 0;
267 }
268
269 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
270         int cost = 0;
271         ir_loop *loop;
272         ir_node *root_block = get_nodes_block(root);
273
274         if (is_Phi(root)) {
275                 /* for phis the copies are placed in the corresponding pred-block */
276                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
277         } else {
278                 /* a perm places the copy in the same block as it resides */
279                 loop = get_irn_loop(root_block);
280         }
281         if (loop) {
282                 int d = get_loop_depth(loop);
283                 cost = d*d;
284         }
285         return 1+cost;
286 }
287
288 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
289         int res;
290         ir_node *root_bl = get_nodes_block(root);
291         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
292         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
293
294         /* don't allow values smaller than one. */
295         return res < 1 ? 1 : res;
296 }
297
298
299 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
300         return 1;
301 }
302
303 /******************************************************************************
304    ____        _   _    _       _ _          _____ _
305   / __ \      | | | |  | |     (_) |        / ____| |
306  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
307  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
308  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
309   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
310         | |                                                            __/ |
311         |_|                                                           |___/
312  ******************************************************************************/
313
314 /**
315  * Determines a maximum weighted independent set with respect to
316  * the interference and conflict edges of all nodes in a qnode.
317  */
318 static int ou_max_ind_set_costs(unit_t *ou) {
319         be_chordal_env_t *chordal_env = ou->co->cenv;
320         ir_node **safe, **unsafe;
321         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
322         bitset_t *curr;
323         int max, pos, curr_weight, best_weight = 0;
324
325         /* assign the nodes into two groups.
326          * safe: node has no interference, hence it is in every max stable set.
327          * unsafe: node has an interference
328          */
329         safe = alloca((ou->node_count-1) * sizeof(*safe));
330         safe_costs = 0;
331         safe_count = 0;
332         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
333         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
334         unsafe_count = 0;
335         for(i=1; i<ou->node_count; ++i) {
336                 int is_safe = 1;
337                 for(o=1; o<ou->node_count; ++o) {
338                         if (i==o)
339                                 continue;
340                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
341                                 unsafe_costs[unsafe_count] = ou->costs[i];
342                                 unsafe[unsafe_count] = ou->nodes[i];
343                                 ++unsafe_count;
344                                 is_safe = 0;
345                                 break;
346                         }
347                 }
348                 if (is_safe) {
349                         safe_costs += ou->costs[i];
350                         safe[safe_count++] = ou->nodes[i];
351                 }
352         }
353
354
355         /* now compute the best set out of the unsafe nodes*/
356         if (unsafe_count > MIS_HEUR_TRIGGER) {
357                 bitset_t *best = bitset_alloca(unsafe_count);
358                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
359                 for (i=0; i<unsafe_count; ++i) {
360                         bitset_set(best, i);
361                         /* check if it is a stable set */
362                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
363                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
364                                         bitset_clear(best, i); /* clear the bit and try next one */
365                                         break;
366                                 }
367                 }
368                 /* compute the weight */
369                 bitset_foreach(best, pos)
370                         best_weight += unsafe_costs[pos];
371         } else {
372                 /* Exact Algorithm: Brute force */
373                 curr = bitset_alloca(unsafe_count);
374                 bitset_set_all(curr);
375                 while ((max = bitset_popcnt(curr)) != 0) {
376                         /* check if curr is a stable set */
377                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
378                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
379                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
380                                                         goto no_stable_set;
381
382                         /* if we arrive here, we have a stable set */
383                         /* compute the weigth of the stable set*/
384                         curr_weight = 0;
385                         bitset_foreach(curr, pos)
386                                 curr_weight += unsafe_costs[pos];
387
388                         /* any better ? */
389                         if (curr_weight > best_weight) {
390                                 best_weight = curr_weight;
391                         }
392
393         no_stable_set:
394                         bitset_minus1(curr);
395                 }
396         }
397
398         return safe_costs+best_weight;
399 }
400
401 static void co_collect_units(ir_node *irn, void *env) {
402         copy_opt_t *co = env;
403         unit_t *unit;
404
405         if (!is_curr_reg_class(co, irn))
406                 return;
407         if (!co_is_optimizable_root(co, irn))
408                 return;
409
410         /* Init a new unit */
411         unit = xcalloc(1, sizeof(*unit));
412         unit->co = co;
413         unit->node_count = 1;
414         INIT_LIST_HEAD(&unit->queue);
415
416         /* Phi with some/all of its arguments */
417         if (is_Reg_Phi(irn)) {
418                 int i, arity;
419
420                 /* init */
421                 arity = get_irn_arity(irn);
422                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
423                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
424                 unit->nodes[0] = irn;
425
426                 /* fill */
427                 for (i=0; i<arity; ++i) {
428                         int o, arg_pos;
429                         ir_node *arg = get_irn_n(irn, i);
430
431                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
432                         if (arg == irn)
433                                 continue;
434                         if (nodes_interfere(co->cenv, irn, arg)) {
435                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
436                                 continue;
437                         }
438
439                         /* Else insert the argument of the phi to the members of this ou */
440                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
441
442                         if (! arch_irn_is(co->aenv, arg, ignore)) {
443                                 /* Check if arg has occurred at a prior position in the arg/list */
444                                 arg_pos = 0;
445                                 for (o=1; o<unit->node_count; ++o) {
446                                         if (unit->nodes[o] == arg) {
447                                                 arg_pos = o;
448                                                 break;
449                                         }
450                                 }
451
452                                 if (!arg_pos) { /* a new argument */
453                                         /* insert node, set costs */
454                                         unit->nodes[unit->node_count] = arg;
455                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
456                                         unit->node_count++;
457                                 } else { /* arg has occured before in same phi */
458                                         /* increase costs for existing arg */
459                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
460                                 }
461                         }
462                 }
463                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
464                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
465         } else
466
467         /* Proj of a perm with corresponding arg */
468         if (is_Perm_Proj(co->aenv, irn)) {
469                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
470                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
471                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
472                 unit->node_count = 2;
473                 unit->nodes[0] = irn;
474                 unit->nodes[1] = get_Perm_src(irn);
475                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
476         } else {
477                 const arch_register_req_t *req =
478                         arch_get_register_req(co->aenv, irn, -1);
479
480                 /* Src == Tgt of a 2-addr-code instruction */
481                 if (is_2addr_code(req)) {
482                         ir_node *other = get_irn_n(irn, req->other_same);
483                         if (!arch_irn_is(co->aenv, other, ignore) &&
484                                         !nodes_interfere(co->cenv, irn, other)) {
485                                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
486                                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
487                                 unit->node_count = 2;
488                                 unit->nodes[0] = irn;
489                                 unit->nodes[1] = other;
490                                 unit->costs[1] = co->get_costs(co, irn, other, -1);
491                         }
492                 } else {
493                         assert(0 && "This is not an optimizable node!");
494                 }
495         }
496
497         /* Insert the new unit at a position according to its costs */
498         if (unit->node_count > 1) {
499                 int i;
500                 struct list_head *tmp;
501
502                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
503                 for(i=1; i<unit->node_count; ++i) {
504                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
505                         unit->all_nodes_costs += unit->costs[i];
506                 }
507
508                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
509                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
510                 /* Insert the new ou according to its sort_key */
511                 tmp = &co->units;
512                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
513                         tmp = tmp->next;
514                 list_add(&unit->units, tmp);
515         } else {
516                 free(unit);
517         }
518 }
519
520 #ifdef QUICK_AND_DIRTY_HACK
521
522 static int compare_ous(const void *k1, const void *k2) {
523         const unit_t *u1 = *((const unit_t **) k1);
524         const unit_t *u2 = *((const unit_t **) k2);
525         int i, o, u1_has_constr, u2_has_constr;
526         arch_register_req_t req;
527         const arch_env_t *aenv = u1->co->aenv;
528
529         /* Units with constraints come first */
530         u1_has_constr = 0;
531         for (i=0; i<u1->node_count; ++i) {
532                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
533                 if (arch_register_req_is(&req, limited)) {
534                         u1_has_constr = 1;
535                         break;
536                 }
537         }
538
539         u2_has_constr = 0;
540         for (i=0; i<u2->node_count; ++i) {
541                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
542                 if (arch_register_req_is(&req, limited)) {
543                         u2_has_constr = 1;
544                         break;
545                 }
546         }
547
548         if (u1_has_constr != u2_has_constr)
549                 return u2_has_constr - u1_has_constr;
550
551         /* Now check, whether the two units are connected */
552 #if 0
553         for (i=0; i<u1->node_count; ++i)
554                 for (o=0; o<u2->node_count; ++o)
555                         if (u1->nodes[i] == u2->nodes[o])
556                                 return 0;
557 #endif
558
559         /* After all, the sort key decides. Greater keys come first. */
560         return u2->sort_key - u1->sort_key;
561
562 }
563
564 /**
565  * Sort the ou's according to constraints and their sort_key
566  */
567 static void co_sort_units(copy_opt_t *co) {
568         int i, count = 0, costs;
569         unit_t *ou, **ous;
570
571         /* get the number of ous, remove them form the list and fill the array */
572         list_for_each_entry(unit_t, ou, &co->units, units)
573                 count++;
574         ous = alloca(count * sizeof(*ous));
575
576         costs = co_get_max_copy_costs(co);
577
578         i = 0;
579         list_for_each_entry(unit_t, ou, &co->units, units)
580                 ous[i++] = ou;
581
582         INIT_LIST_HEAD(&co->units);
583
584         assert(count == i && list_empty(&co->units));
585
586         for (i=0; i<count; ++i)
587                 ir_printf("%+F\n", ous[i]->nodes[0]);
588
589         qsort(ous, count, sizeof(*ous), compare_ous);
590
591         ir_printf("\n\n");
592         for (i=0; i<count; ++i)
593                 ir_printf("%+F\n", ous[i]->nodes[0]);
594
595         /* reinsert into list in correct order */
596         for (i=0; i<count; ++i)
597                 list_add_tail(&ous[i]->units, &co->units);
598
599         assert(costs == co_get_max_copy_costs(co));
600 }
601 #endif
602
603 void co_build_ou_structure(copy_opt_t *co) {
604         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
605         INIT_LIST_HEAD(&co->units);
606         irg_walk_graph(co->irg, co_collect_units, NULL, co);
607 #ifdef QUICK_AND_DIRTY_HACK
608         co_sort_units(co);
609 #endif
610 }
611
612 void co_free_ou_structure(copy_opt_t *co) {
613         unit_t *curr, *tmp;
614         ASSERT_OU_AVAIL(co);
615         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
616                 xfree(curr->nodes);
617                 xfree(curr->costs);
618                 xfree(curr);
619         }
620         co->units.next = NULL;
621 }
622
623 /* co_solve_heuristic() is implemented in becopyheur.c */
624
625 int co_get_max_copy_costs(const copy_opt_t *co) {
626         int i, res = 0;
627         unit_t *curr;
628
629         ASSERT_OU_AVAIL(co);
630
631         list_for_each_entry(unit_t, curr, &co->units, units) {
632                 res += curr->inevitable_costs;
633                 for (i=1; i<curr->node_count; ++i)
634                         res += curr->costs[i];
635         }
636         return res;
637 }
638
639 int co_get_inevit_copy_costs(const copy_opt_t *co) {
640         int res = 0;
641         unit_t *curr;
642
643         ASSERT_OU_AVAIL(co);
644
645         list_for_each_entry(unit_t, curr, &co->units, units)
646                 res += curr->inevitable_costs;
647         return res;
648 }
649
650 int co_get_copy_costs(const copy_opt_t *co) {
651         int i, res = 0;
652         unit_t *curr;
653
654         ASSERT_OU_AVAIL(co);
655
656         list_for_each_entry(unit_t, curr, &co->units, units) {
657                 int root_col = get_irn_col(co, curr->nodes[0]);
658                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
659                 res += curr->inevitable_costs;
660                 for (i=1; i<curr->node_count; ++i) {
661                         int arg_col = get_irn_col(co, curr->nodes[i]);
662                         if (root_col != arg_col) {
663                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
664                                 res += curr->costs[i];
665                         }
666                 }
667         }
668         return res;
669 }
670
671 int co_get_lower_bound(const copy_opt_t *co) {
672         int res = 0;
673         unit_t *curr;
674
675         ASSERT_OU_AVAIL(co);
676
677         list_for_each_entry(unit_t, curr, &co->units, units)
678                 res += curr->inevitable_costs + curr->min_nodes_costs;
679         return res;
680 }
681
682 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
683 {
684         bitset_t *seen = bitset_irg_malloc(co->irg);
685         affinity_node_t *an;
686
687         memset(stat, 0, sizeof(stat[0]));
688
689         /* count affinity edges. */
690         co_gs_foreach_aff_node(co, an) {
691                 neighb_t *neigh;
692                 stat->aff_nodes += 1;
693                 bitset_add_irn(seen, an->irn);
694                 co_gs_foreach_neighb(an, neigh) {
695                         if(!bitset_contains_irn(seen, neigh->irn)) {
696                                 stat->aff_edges += 1;
697                                 stat->max_costs += neigh->costs;
698
699                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
700                                         stat->costs += neigh->costs;
701                                         stat->unsatisfied_edges += 1;
702                                 }
703
704                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
705                                         stat->aff_int += 1;
706                                         stat->inevit_costs += neigh->costs;
707                                 }
708
709                         }
710                 }
711         }
712
713         bitset_free(seen);
714 }
715
716 /******************************************************************************
717    _____                 _        _____ _
718   / ____|               | |      / ____| |
719  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
720  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
721  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
722   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
723                   | |                                       __/ |
724                   |_|                                      |___/
725  ******************************************************************************/
726
727 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
728         const affinity_node_t *n1 = k1;
729         const affinity_node_t *n2 = k2;
730
731         return (n1->irn != n2->irn);
732 }
733
734 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
735         affinity_node_t new_node, *node;
736         neighb_t new_nbr, *nbr;
737         int allocnew;
738
739         new_node.irn        = n1;
740         new_node.degree     = 0;
741         new_node.neighbours = NULL;
742         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
743
744         allocnew = 1;
745         for (nbr = node->neighbours; nbr; nbr = nbr->next)
746                 if (nbr->irn == n2) {
747                         allocnew = 0;
748                         break;
749                 }
750
751         /* if we did not find n2 in n1's neighbourhood insert it */
752         if (allocnew) {
753                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
754                 nbr = obstack_finish(&co->obst);
755                 nbr->irn   = n2;
756                 nbr->costs = 0;
757                 nbr->next  = node->neighbours;
758                 node->neighbours = nbr;
759                 node->degree++;
760         }
761
762         /* now nbr points to n1's neighbour-entry of n2 */
763         nbr->costs += costs;
764 }
765
766 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
767         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
768                 add_edge(co, n1, n2, costs);
769                 add_edge(co, n2, n1, costs);
770         }
771 }
772
773 static void build_graph_walker(ir_node *irn, void *env) {
774         copy_opt_t *co = env;
775         int pos, max;
776         const arch_register_t *reg;
777
778         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
779                 return;
780
781         reg = arch_get_irn_register(co->aenv, irn);
782         if (arch_register_type_is(reg, ignore))
783                 return;
784
785         /* Phis */
786         if (is_Reg_Phi(irn))
787                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
788                         ir_node *arg = get_irn_n(irn, pos);
789                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
790                 }
791
792         /* Perms */
793         else if (is_Perm_Proj(co->aenv, irn)) {
794                 ir_node *arg = get_Perm_src(irn);
795                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
796         }
797
798         /* 2-address code */
799         else {
800                 const arch_register_req_t *req =
801                         arch_get_register_req(co->aenv, irn, -1);
802                 if (is_2addr_code(req)) {
803                         ir_node *other = get_irn_n(irn, req->other_same);
804                         if(!arch_irn_is(co->aenv, other, ignore))
805                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
806                 }
807         }
808 }
809
810 void co_build_graph_structure(copy_opt_t *co) {
811         obstack_init(&co->obst);
812         co->nodes = new_set(compare_affinity_node_t, 32);
813
814         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
815 }
816
817 void co_free_graph_structure(copy_opt_t *co) {
818         ASSERT_GS_AVAIL(co);
819
820         del_set(co->nodes);
821         obstack_free(&co->obst, NULL);
822         co->nodes = NULL;
823 }
824
825 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
826
827 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
828         affinity_node_t new_node, *n;
829
830         ASSERT_GS_AVAIL(co);
831
832         new_node.irn = irn;
833         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
834         if (n) {
835                 return (n->degree > 0);
836         } else
837                 return 0;
838 }
839
840 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
841 {
842         be_ifg_t *ifg  = co->cenv->ifg;
843         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
844
845         ir_node *irn;
846         void *it, *nit;
847         int i, n, n_regs;
848
849         n_regs = 0;
850         for(i = 0; i < co->cls->n_regs; ++i) {
851                 const arch_register_t *reg = &co->cls->regs[i];
852                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
853         }
854
855         /*
856          * n contains the first node number.
857          * the values below n are the pre-colored register nodes
858          */
859
860         it  = be_ifg_nodes_iter_alloca(ifg);
861         nit = be_ifg_neighbours_iter_alloca(ifg);
862
863         n = n_regs;
864         be_ifg_foreach_node(ifg, it, irn) {
865                 if(!arch_irn_is(co->aenv, irn, ignore))
866                         set_irn_link(irn, INT_TO_PTR(n++));
867         }
868
869         fprintf(f, "%d %d\n", n, n_regs);
870
871         be_ifg_foreach_node(ifg, it, irn) {
872                 if(!arch_irn_is(co->aenv, irn, ignore)) {
873                         int idx            = PTR_TO_INT(get_irn_link(irn));
874                         affinity_node_t *a = get_affinity_info(co, irn);
875
876                         const arch_register_req_t *req;
877                         ir_node *adj;
878
879                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
880                         if(arch_register_req_is(req, limited)) {
881                                 for(i = 0; i < co->cls->n_regs; ++i) {
882                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
883                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
884                                 }
885                         }
886
887
888                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
889                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
890                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
891                                         if(idx < adj_idx)
892                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
893                                 }
894                         }
895
896                         if(a) {
897                                 neighb_t *n;
898
899                                 co_gs_foreach_neighb(a, n) {
900                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
901                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
902                                                 if(idx < n_idx)
903                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
904                                         }
905                                 }
906                         }
907                 }
908         }
909 }
910
911 typedef struct _appel_clique_walker_t {
912         ir_phase ph;
913         const copy_opt_t *co;
914         int curr_nr;
915         int node_count;
916         FILE *f;
917         int dumb;
918         int *color_map;
919         struct obstack obst;
920 } appel_clique_walker_t;
921
922 typedef struct _appel_block_info_t {
923         int *live_end_nr;
924         int *live_in_nr;
925         int *phi_nr;
926         ir_node **live_end;
927         ir_node **live_in;
928         ir_node **phi;
929         int n_live_end;
930         int n_live_in;
931         int n_phi;
932 } appel_block_info_t;
933
934 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
935 {
936 #if 0
937         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
938         int res = (int) freq;
939         return res == 0 ? 1 : res;
940 #else
941         ir_loop *loop = get_irn_loop(bl);
942         if(loop) {
943                 int d = get_loop_depth(loop);
944                 return 1 + d * d;
945         }
946         return 1;
947 #endif
948 }
949
950 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
951 {
952         appel_block_info_t *res = NULL;
953
954         if(is_Block(irn)) {
955                 appel_clique_walker_t *d = (void *) phase;
956                 res = phase_alloc(phase, sizeof(res[0]));
957                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
958                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
959                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
960                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
961                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
962                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
963         }
964
965         return res;
966 }
967
968 typedef struct _insn_list_t {
969         be_insn_t *insn;
970         struct list_head list;
971 } insn_list_t;
972
973 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
974 {
975         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
976         int i;
977
978         for(i = 0; i < bli->n_live_end; ++i)
979                 if(bli->live_end[i] == irn)
980                         return bli->live_end_nr[i];
981
982         return -1;
983 }
984
985 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
986 {
987         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
988         ir_node *irn;
989         int n_live;
990         int j;
991
992         n_live = 0;
993         foreach_pset(live, irn)
994                 live_arr[n_live++] = irn;
995
996         /* dump the live after clique */
997         if(!env->dumb) {
998                 for(j = 0; j < n_live; ++j) {
999                         int k;
1000
1001                         for(k = j + 1; k < n_live; ++k) {
1002                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
1003                         }
1004                         fprintf(env->f, "\n");
1005                 }
1006         }
1007
1008         /* dump the affinities */
1009         for(j = 0; !env->dumb && j < n_live; ++j) {
1010                 ir_node *irn = live_arr[j];
1011                 int old_nr = PTR_TO_INT(get_irn_link(irn));
1012
1013                 /* if the node was already live in the last insn dump the affinity */
1014                 if(old_nr > start_nr) {
1015                         int weight = appel_aff_weight(env, bl);
1016                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
1017                 }
1018         }
1019
1020         /* set the current numbers into the link field. */
1021         for(j = 0; j < n_live; ++j) {
1022                 ir_node *irn = live_arr[j];
1023                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1024         }
1025
1026         return curr_nr + n_live;
1027 }
1028
1029 static void appel_walker(ir_node *bl, void *data)
1030 {
1031         appel_clique_walker_t *env = data;
1032         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1033         struct obstack *obst       = &env->obst;
1034         void *base                 = obstack_base(obst);
1035         pset *live                 = pset_new_ptr_default();
1036         be_lv_t *lv                = env->co->cenv->birg->lv;
1037
1038         int n_insns  = 0;
1039         int n_nodes  = 0;
1040         int start_nr = env->curr_nr;
1041         int curr_nr  = start_nr;
1042
1043         be_insn_env_t insn_env;
1044         int i, j;
1045         ir_node *irn;
1046         be_insn_t **insns;
1047
1048         insn_env.aenv = env->co->aenv;
1049         insn_env.cls  = env->co->cls;
1050         insn_env.obst = obst;
1051         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1052
1053         /* Guess how many insns will be in this block. */
1054         sched_foreach(bl, irn)
1055                 n_nodes++;
1056
1057         bli->n_phi = 0;
1058         insns = malloc(n_nodes * sizeof(insns[0]));
1059
1060         /* Put all insns in an array. */
1061         irn = sched_first(bl);
1062         while(!sched_is_end(irn)) {
1063                 be_insn_t *insn;
1064                 insn = be_scan_insn(&insn_env, irn);
1065                 insns[n_insns++] = insn;
1066                 irn = insn->next_insn;
1067         }
1068
1069         DBG((dbg, LEVEL_2, "%+F\n", bl));
1070         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live);
1071
1072         /* Generate the bad and ugly. */
1073         for(i = n_insns - 1; i >= 0; --i) {
1074                 be_insn_t *insn = insns[i];
1075
1076                 /* The first live set has to be saved in the block border set. */
1077                 if(i == n_insns - 1) {
1078                         j = 0;
1079                         foreach_pset(live, irn) {
1080                                 bli->live_end[j]    = irn;
1081                                 bli->live_end_nr[j] = curr_nr + j;
1082                                 ++j;
1083                         }
1084                         bli->n_live_end = j;
1085                 }
1086
1087                 if(!env->dumb) {
1088                         for(j = 0; j < insn->use_start; ++j) {
1089                                 ir_node *op   = insn->ops[j].carrier;
1090                                 bitset_t *adm = insn->ops[j].regs;
1091                                 int k;
1092                                 int nr;
1093
1094                                 if(!insn->ops[j].has_constraints)
1095                                         continue;
1096
1097                                 nr = 0;
1098                                 foreach_pset(live, irn) {
1099                                         if(irn == op) {
1100                                                 pset_break(live);
1101                                                 break;
1102                                         }
1103                                         ++nr;
1104                                 }
1105
1106                                 assert(nr < pset_count(live));
1107
1108                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1109                                         int mapped_col = env->color_map[k];
1110                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1111                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1112                                 }
1113                         }
1114                 }
1115
1116                 /* dump the clique and update the stuff. */
1117                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1118
1119                 /* remove all defs. */
1120                 for(j = 0; j < insn->use_start; ++j)
1121                         pset_remove_ptr(live, insn->ops[j].carrier);
1122
1123                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1124                         bli->phi[bli->n_phi]    = insn->irn;
1125                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1126                         bli->n_phi++;
1127                 }
1128
1129                 /* add all uses */
1130                 else
1131                         for(j = insn->use_start; j < insn->n_ops; ++j)
1132                                 pset_insert_ptr(live, insn->ops[j].carrier);
1133         }
1134
1135         /* print the start clique. */
1136         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1137
1138         i = 0;
1139         foreach_pset(live, irn) {
1140                 bli->live_in[i]    = irn;
1141                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1142                 ++i;
1143         }
1144         bli->n_live_in = i;
1145
1146         del_pset(live);
1147         free(insns);
1148         obstack_free(obst, base);
1149         env->curr_nr = curr_nr;
1150 }
1151
1152 static void appel_inter_block_aff(ir_node *bl, void *data)
1153 {
1154         appel_clique_walker_t *env = data;
1155         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1156
1157         int i, j, n;
1158
1159         for(i = 0; i < bli->n_live_in; ++i) {
1160                 ir_node *irn = bli->live_in[i];
1161
1162                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1163                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1164
1165                         int nr = appel_get_live_end_nr(env, pred, irn);
1166                         assert(nr >= 0);
1167                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1168                 }
1169         }
1170
1171         for(i = 0; i < bli->n_phi; ++i) {
1172                 ir_node *irn = bli->phi[i];
1173
1174                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1175                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1176                         ir_node *op = get_irn_n(irn, j);
1177
1178                         int nr = appel_get_live_end_nr(env, pred, op);
1179                         assert(nr >= 0);
1180                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1181                 }
1182         }
1183
1184 }
1185
1186 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1187 {
1188         int i;
1189         int n_colors;
1190         appel_clique_walker_t env;
1191         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1192         be_lv_t *lv = co->cenv->birg->lv;
1193
1194         be_liveness_recompute(lv);
1195         obstack_init(&env.obst);
1196         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1197         env.curr_nr = co->cls->n_regs;
1198         env.co = co;
1199         env.f = f;
1200
1201         bitset_copy(adm, co->cenv->ignore_colors);
1202         bitset_flip_all(adm);
1203
1204         /* Make color map. */
1205         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1206         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1207                 const arch_register_t *reg = &co->cls->regs[i];
1208                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1209         }
1210
1211         env.dumb = 1;
1212         env.curr_nr = n_colors;
1213         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1214         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1215
1216         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1217
1218         /* make the first k nodes interfere */
1219         for(i = 0; i < n_colors; ++i) {
1220                 int j;
1221                 for(j = i + 1; j < n_colors; ++j)
1222                         fprintf(f, "%d %d -1 ", i, j);
1223                 fprintf(f, "\n");
1224         }
1225
1226         env.dumb = 0;
1227         env.curr_nr = n_colors;
1228         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1229         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1230         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1231         obstack_free(&env.obst, NULL);
1232 }
1233
1234 /*
1235          ___ _____ ____   ____   ___ _____   ____                        _
1236         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1237          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1238          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1239         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1240                                                                   |_|            |___/
1241 */
1242
1243 static const char *get_dot_color_name(int col)
1244 {
1245         static const char *names[] = {
1246                 "blue",
1247                 "red",
1248                 "green",
1249                 "yellow",
1250                 "cyan",
1251                 "magenta",
1252                 "orange",
1253                 "chocolate",
1254                 "beige",
1255                 "navy",
1256                 "darkgreen",
1257                 "darkred",
1258                 "lightPink",
1259                 "chartreuse",
1260                 "lightskyblue",
1261                 "linen",
1262                 "pink",
1263                 "lightslateblue",
1264                 "mintcream",
1265                 "red",
1266                 "darkolivegreen",
1267                 "mediumblue",
1268                 "mistyrose",
1269                 "salmon",
1270                 "darkseagreen",
1271                 "mediumslateblue"
1272                 "moccasin",
1273                 "tomato",
1274                 "forestgreen",
1275                 "darkturquoise",
1276                 "palevioletred"
1277         };
1278
1279         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1280 }
1281
1282 typedef struct _co_ifg_dump_t {
1283         const copy_opt_t *co;
1284         unsigned flags;
1285 } co_ifg_dump_t;
1286
1287 static void ifg_dump_graph_attr(FILE *f, void *self)
1288 {
1289         fprintf(f, "overlap=scale");
1290 }
1291
1292 static int ifg_is_dump_node(void *self, ir_node *irn)
1293 {
1294         co_ifg_dump_t *cod = self;
1295         return !arch_irn_is(cod->co->aenv, irn, ignore);
1296 }
1297
1298 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1299 {
1300         co_ifg_dump_t *env         = self;
1301         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1302         const arch_register_req_t *req;
1303         int limited;
1304
1305         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1306         limited = arch_register_req_is(req, limited);
1307
1308         if(env->flags & CO_IFG_DUMP_LABELS) {
1309                 ir_fprintf(f, "label=\"%+F", irn);
1310
1311 #if 0
1312                 // TODO fix this...
1313                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1314                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1315                         req.limited(req.limited_env, bs);
1316                         ir_fprintf(f, "\\n%B", bs);
1317                 }
1318 #endif
1319                 ir_fprintf(f, "\" ");
1320         } else {
1321                 fprintf(f, "label=\"\" shape=point " );
1322         }
1323
1324         if(env->flags & CO_IFG_DUMP_SHAPE)
1325                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1326
1327         if(env->flags & CO_IFG_DUMP_COLORS)
1328                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1329 }
1330
1331 static void ifg_dump_at_end(FILE *file, void *self)
1332 {
1333         co_ifg_dump_t *env = self;
1334         affinity_node_t *a;
1335
1336         co_gs_foreach_aff_node(env->co, a) {
1337                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1338                 unsigned aidx = get_irn_idx(a->irn);
1339                 neighb_t *n;
1340
1341                 co_gs_foreach_neighb(a, n) {
1342                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1343                         unsigned nidx = get_irn_idx(n->irn);
1344
1345                         if(aidx < nidx) {
1346                                 const char *color = nr == ar ? "blue" : "red";
1347                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1348                                 if(env->flags & CO_IFG_DUMP_LABELS)
1349                                         fprintf(file, "label=\"%d\" ", n->costs);
1350                                 if(env->flags & CO_IFG_DUMP_COLORS)
1351                                         fprintf(file, "color=%s ", color);
1352                                 else
1353                                         fprintf(file, "style=dotted");
1354                                 fprintf(file, "];\n");
1355                         }
1356                 }
1357         }
1358 }
1359
1360
1361 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1362         ifg_is_dump_node,
1363         ifg_dump_graph_attr,
1364         ifg_dump_node_attr,
1365         NULL,
1366         NULL,
1367         ifg_dump_at_end
1368 };
1369
1370
1371
1372 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1373 {
1374         co_ifg_dump_t cod;
1375
1376         cod.co    = co;
1377         cod.flags = flags;
1378         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1379 }
1380
1381
1382 void co_solve_park_moon(copy_opt_t *opt)
1383 {
1384
1385 }
1386
1387 static int void_algo(copy_opt_t *co)
1388 {
1389         return 0;
1390 }
1391
1392 /*
1393                 _    _                  _ _   _
1394            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1395           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1396          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1397         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1398                            |___/
1399 */
1400
1401 typedef struct {
1402         co_algo_t  *algo;
1403         const char *name;
1404         int        can_improve_existing;
1405 } co_algo_info_t;
1406
1407 static co_algo_info_t algos[] = {
1408         { void_algo,               "none",  0 },
1409         { co_solve_heuristic,      "heur1", 0 },
1410         { co_solve_heuristic_new,  "heur2", 0 },
1411         { co_solve_heuristic_java, "heur3", 0 },
1412         { co_solve_heuristic_mst,  "heur4", 0 },
1413 #ifdef WITH_ILP
1414         { co_solve_ilp2,           "ilp",   1 },
1415 #endif
1416         { NULL,                    "",      0 }
1417 };
1418
1419 /*
1420     __  __       _         ____       _
1421    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1422    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1423    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1424    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1425
1426 */
1427
1428 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1429 {
1430         FILE *result;
1431         char buf[1024];
1432
1433         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1434         result = fopen(buf, "wt");
1435         if(result == NULL) {
1436                 panic("Couldn't open '%s' for writing.", buf);
1437         }
1438
1439         return result;
1440 }
1441
1442 void co_driver(be_chordal_env_t *cenv)
1443 {
1444         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1445         co_complete_stats_t before, after;
1446         copy_opt_t          *co;
1447         co_algo_t           *algo_func;
1448         int                 was_optimal = 0;
1449
1450         if (algo < 0 || algo >= CO_ALGO_LAST)
1451                 return;
1452
1453         co = new_copy_opt(cenv, cost_func);
1454         co_build_ou_structure(co);
1455         co_build_graph_structure(co);
1456
1457         co_complete_stats(co, &before);
1458
1459         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1460         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1461         be_stat_ev_ull("co_max_costs",    before.max_costs);
1462         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1463         be_stat_ev_ull("co_aff_int",      before.aff_int);
1464
1465         be_stat_ev_ull("co_init_costs",   before.costs);
1466         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1467
1468         /* Dump the interference graph in Appel's format. */
1469         if (dump_flags & DUMP_APPEL) {
1470                 FILE *f = my_open(cenv, "", ".apl");
1471                 co_dump_appel_graph(co, f);
1472                 fclose(f);
1473         }
1474
1475         if (dump_flags & DUMP_BEFORE) {
1476                 FILE *f = my_open(cenv, "", "-before.dot");
1477                 co_dump_ifg_dot(co, f, style_flags);
1478                 fclose(f);
1479         }
1480
1481         /* if the algo can improve results, provide an initial solution with heur3 */
1482         if (improve && algos[algo].can_improve_existing) {
1483                 co_complete_stats_t stats;
1484
1485                 /* produce a heuristic solution */
1486 #ifdef WITH_JVM
1487                 co_solve_heuristic_java(co);
1488 #else
1489                 co_solve_heuristic(co);
1490 #endif /* WITH_JVM */
1491
1492                 /* do the stats and provide the current costs */
1493                 co_complete_stats(co, &stats);
1494                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1495         }
1496
1497 #ifdef WITH_JVM
1498         /* start the JVM here so that it does not tamper the timing. */
1499         if (algo == CO_ALGO_HEUR3)
1500                 be_java_coal_start_jvm();
1501 #endif /* WITH_JVM */
1502
1503         algo_func = algos[algo].algo;
1504
1505         /* perform actual copy minimization */
1506         lc_timer_reset_and_start(timer);
1507         was_optimal = algo_func(co);
1508         lc_timer_stop(timer);
1509
1510         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1511         be_stat_ev_ull("co_optimal", was_optimal);
1512
1513         if (dump_flags & DUMP_AFTER) {
1514                 FILE *f = my_open(cenv, "", "-after.dot");
1515                 co_dump_ifg_dot(co, f, style_flags);
1516                 fclose(f);
1517         }
1518
1519         co_complete_stats(co, &after);
1520
1521         if (do_stats) {
1522                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1523                 ulong64 evitable          = after.costs     - after.inevit_costs;
1524
1525                 ir_printf("%30F ", cenv->irg);
1526                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1527
1528                 if(optimizable_costs > 0)
1529                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1530                 else
1531                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1532         }
1533
1534         be_stat_ev_ull("co_after_costs", after.costs);
1535         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1536
1537         co_free_graph_structure(co);
1538         co_free_ou_structure(co);
1539         free_copy_opt(co);
1540 }