add some more comments
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "benodesets.h"
64 #include "bejavacoal.h"
65 #include "bestatevent.h"
66 #include "beirg_t.h"
67 #include "error.h"
68
69 #include <libcore/lc_timing.h>
70 #include <libcore/lc_opts.h>
71 #include <libcore/lc_opts_enum.h>
72
73 #define DUMP_BEFORE 1
74 #define DUMP_AFTER  2
75 #define DUMP_APPEL  4
76 #define DUMP_ALL    2 * DUMP_APPEL - 1
77
78 #define COST_FUNC_FREQ     1
79 #define COST_FUNC_LOOP     2
80 #define COST_FUNC_ALL_ONE  3
81
82 static unsigned   dump_flags  = 0;
83 static unsigned   style_flags = 0;
84 static unsigned   do_stats    = 0;
85 static cost_fct_t cost_func   = co_get_costs_exec_freq;
86 static unsigned   algo        = CO_ALGO_HEUR4;
87 static int        improve     = 1;
88
89 static const lc_opt_enum_mask_items_t dump_items[] = {
90         { "before",  DUMP_BEFORE },
91         { "after",   DUMP_AFTER  },
92         { "appel",   DUMP_APPEL  },
93         { "all",     DUMP_ALL    },
94         { NULL,      0 }
95 };
96
97 static const lc_opt_enum_mask_items_t style_items[] = {
98         { "color",   CO_IFG_DUMP_COLORS },
99         { "labels",  CO_IFG_DUMP_LABELS },
100         { "constr",  CO_IFG_DUMP_CONSTR },
101         { "shape",   CO_IFG_DUMP_SHAPE  },
102         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
103         { NULL,      0 }
104 };
105
106 static const lc_opt_enum_mask_items_t algo_items[] = {
107         { "none",   CO_ALGO_NONE  },
108         { "heur",   CO_ALGO_HEUR  },
109         { "heur2",  CO_ALGO_HEUR2 },
110 #ifdef WITH_JVM
111         { "heur3",  CO_ALGO_HEUR3 },
112 #endif /* WITH_JVM */
113         { "heur4",  CO_ALGO_HEUR4 },
114 #ifdef WITH_ILP
115         { "ilp",    CO_ALGO_ILP   },
116 #endif /* WITH_ILP */
117         { NULL,     0 }
118 };
119
120 typedef int (*opt_funcptr)(void);
121
122 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
123         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
124         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
125         { "one",    (opt_funcptr) co_get_costs_all_one },
126         { NULL,     NULL }
127 };
128
129 static lc_opt_enum_mask_var_t dump_var = {
130         &dump_flags, dump_items
131 };
132
133 static lc_opt_enum_mask_var_t style_var = {
134         &style_flags, style_items
135 };
136
137 static lc_opt_enum_mask_var_t algo_var = {
138         &algo, algo_items
139 };
140
141 static lc_opt_enum_func_ptr_var_t cost_func_var = {
142         (opt_funcptr*) &cost_func, cost_func_items
143 };
144
145 static const lc_opt_table_entry_t options[] = {
146         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
147         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
148         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
149         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
150         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
151         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
152         LC_OPT_LAST
153 };
154
155 /* Insert additional options registration functions here. */
156 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
157 extern void be_co2_register_options(lc_opt_entry_t *grp);
158 extern void be_co3_register_options(lc_opt_entry_t *grp);
159
160 void be_init_copycoal(void)
161 {
162         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
163         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
164         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
165         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
166
167         lc_opt_add_table(co_grp, options);
168 }
169
170 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
171
172 #undef QUICK_AND_DIRTY_HACK
173
174 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
175 {
176         if (env->ifg)
177                 return be_ifg_connected(env->ifg, a, b);
178         else
179                 return values_interfere(env->birg, a, b);
180 }
181
182
183 /******************************************************************************
184     _____                           _
185    / ____|                         | |
186   | |  __  ___ _ __   ___ _ __ __ _| |
187   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
188   | |__| |  __/ | | |  __/ | | (_| | |
189    \_____|\___|_| |_|\___|_|  \__,_|_|
190
191  ******************************************************************************/
192
193 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
194
195
196 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
197 {
198         const char *s1, *s2, *s3;
199         int len;
200         copy_opt_t *co;
201
202         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
203
204         co = xcalloc(1, sizeof(*co));
205         co->cenv      = chordal_env;
206         co->aenv      = chordal_env->birg->main_env->arch_env;
207         co->irg       = chordal_env->irg;
208         co->cls       = chordal_env->cls;
209         co->get_costs = get_costs;
210
211         s1 = get_irp_prog_name();
212         s2 = get_entity_name(get_irg_entity(co->irg));
213         s3 = chordal_env->cls->name;
214         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
215         co->name = xmalloc(len);
216         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
217
218         return co;
219 }
220
221 void free_copy_opt(copy_opt_t *co) {
222         xfree(co->name);
223         free(co);
224 }
225
226 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
227         const arch_register_req_t *req;
228         const arch_register_t *reg;
229
230         if (arch_irn_is(co->aenv, irn, ignore))
231                 return 0;
232
233         reg = arch_get_irn_register(co->aenv, irn);
234         if (arch_register_type_is(reg, ignore))
235                 return 0;
236
237         req = arch_get_register_req(co->aenv, irn, -1);
238         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
239                 return 1;
240
241         return 0;
242 }
243
244 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
245         int cost = 0;
246         ir_loop *loop;
247         ir_node *root_block = get_nodes_block(root);
248         (void) co;
249         (void) arg;
250
251         if (is_Phi(root)) {
252                 /* for phis the copies are placed in the corresponding pred-block */
253                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
254         } else {
255                 /* a perm places the copy in the same block as it resides */
256                 loop = get_irn_loop(root_block);
257         }
258         if (loop) {
259                 int d = get_loop_depth(loop);
260                 cost = d*d;
261         }
262         return 1+cost;
263 }
264
265 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
266         int res;
267         ir_node *root_bl = get_nodes_block(root);
268         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
269         (void) arg;
270         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
271
272         /* don't allow values smaller than one. */
273         return res < 1 ? 1 : res;
274 }
275
276
277 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
278         (void) co;
279         (void) root;
280         (void) arg;
281         (void) pos;
282         return 1;
283 }
284
285 /******************************************************************************
286    ____        _   _    _       _ _          _____ _
287   / __ \      | | | |  | |     (_) |        / ____| |
288  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
289  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
290  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
291   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
292         | |                                                            __/ |
293         |_|                                                           |___/
294  ******************************************************************************/
295
296 /**
297  * Determines a maximum weighted independent set with respect to
298  * the interference and conflict edges of all nodes in a qnode.
299  */
300 static int ou_max_ind_set_costs(unit_t *ou) {
301         be_chordal_env_t *chordal_env = ou->co->cenv;
302         ir_node **safe, **unsafe;
303         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
304         bitset_t *curr;
305         bitset_pos_t pos;
306         int max, curr_weight, best_weight = 0;
307
308         /* assign the nodes into two groups.
309          * safe: node has no interference, hence it is in every max stable set.
310          * unsafe: node has an interference
311          */
312         safe = alloca((ou->node_count-1) * sizeof(*safe));
313         safe_costs = 0;
314         safe_count = 0;
315         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
316         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
317         unsafe_count = 0;
318         for(i=1; i<ou->node_count; ++i) {
319                 int is_safe = 1;
320                 for(o=1; o<ou->node_count; ++o) {
321                         if (i==o)
322                                 continue;
323                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
324                                 unsafe_costs[unsafe_count] = ou->costs[i];
325                                 unsafe[unsafe_count] = ou->nodes[i];
326                                 ++unsafe_count;
327                                 is_safe = 0;
328                                 break;
329                         }
330                 }
331                 if (is_safe) {
332                         safe_costs += ou->costs[i];
333                         safe[safe_count++] = ou->nodes[i];
334                 }
335         }
336
337
338         /* now compute the best set out of the unsafe nodes*/
339         if (unsafe_count > MIS_HEUR_TRIGGER) {
340                 bitset_t *best = bitset_alloca(unsafe_count);
341                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
342                 for (i=0; i<unsafe_count; ++i) {
343                         bitset_set(best, i);
344                         /* check if it is a stable set */
345                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
346                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
347                                         bitset_clear(best, i); /* clear the bit and try next one */
348                                         break;
349                                 }
350                 }
351                 /* compute the weight */
352                 bitset_foreach(best, pos)
353                         best_weight += unsafe_costs[pos];
354         } else {
355                 /* Exact Algorithm: Brute force */
356                 curr = bitset_alloca(unsafe_count);
357                 bitset_set_all(curr);
358                 while ((max = bitset_popcnt(curr)) != 0) {
359                         /* check if curr is a stable set */
360                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
361                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
362                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
363                                                         goto no_stable_set;
364
365                         /* if we arrive here, we have a stable set */
366                         /* compute the weigth of the stable set*/
367                         curr_weight = 0;
368                         bitset_foreach(curr, pos)
369                                 curr_weight += unsafe_costs[pos];
370
371                         /* any better ? */
372                         if (curr_weight > best_weight) {
373                                 best_weight = curr_weight;
374                         }
375
376         no_stable_set:
377                         bitset_minus1(curr);
378                 }
379         }
380
381         return safe_costs+best_weight;
382 }
383
384 static void co_collect_units(ir_node *irn, void *env) {
385         copy_opt_t *co = env;
386         unit_t *unit;
387
388         if (!is_curr_reg_class(co, irn))
389                 return;
390         if (!co_is_optimizable_root(co, irn))
391                 return;
392
393         /* Init a new unit */
394         unit = xcalloc(1, sizeof(*unit));
395         unit->co = co;
396         unit->node_count = 1;
397         INIT_LIST_HEAD(&unit->queue);
398
399         /* Phi with some/all of its arguments */
400         if (is_Reg_Phi(irn)) {
401                 int i, arity;
402
403                 /* init */
404                 arity = get_irn_arity(irn);
405                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
406                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
407                 unit->nodes[0] = irn;
408
409                 /* fill */
410                 for (i=0; i<arity; ++i) {
411                         int o, arg_pos;
412                         ir_node *arg = get_irn_n(irn, i);
413
414                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
415                         if (arg == irn)
416                                 continue;
417                         if (nodes_interfere(co->cenv, irn, arg)) {
418                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
419                                 continue;
420                         }
421
422                         /* Else insert the argument of the phi to the members of this ou */
423                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
424
425                         if (! arch_irn_is(co->aenv, arg, ignore)) {
426                                 /* Check if arg has occurred at a prior position in the arg/list */
427                                 arg_pos = 0;
428                                 for (o=1; o<unit->node_count; ++o) {
429                                         if (unit->nodes[o] == arg) {
430                                                 arg_pos = o;
431                                                 break;
432                                         }
433                                 }
434
435                                 if (!arg_pos) { /* a new argument */
436                                         /* insert node, set costs */
437                                         unit->nodes[unit->node_count] = arg;
438                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
439                                         unit->node_count++;
440                                 } else { /* arg has occurred before in same phi */
441                                         /* increase costs for existing arg */
442                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
443                                 }
444                         }
445                 }
446                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
447                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
448         } else if (is_Perm_Proj(co->aenv, irn)) {
449                 /* Proj of a perm with corresponding arg */
450                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
451                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
452                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
453                 unit->node_count = 2;
454                 unit->nodes[0] = irn;
455                 unit->nodes[1] = get_Perm_src(irn);
456                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
457         } else {
458                 const arch_register_req_t *req =
459                         arch_get_register_req(co->aenv, irn, -1);
460
461                 /* Src == Tgt of a 2-addr-code instruction */
462                 if (is_2addr_code(req)) {
463                         ir_node *other  = get_irn_n(skip_Proj(irn), req->other_same[0]);
464                         ir_node *other2 = NULL;
465                         int      count;
466
467                         if (arch_irn_is(co->aenv, other, ignore) ||
468                                         nodes_interfere(co->cenv, irn, other)) {
469                                 other = NULL;
470                         }
471                         if (req->other_same[1] != -1) {
472                                 other2 = get_irn_n(skip_Proj(irn), req->other_same[1]);
473                                 if (arch_irn_is(co->aenv, other2, ignore) ||
474                                                 nodes_interfere(co->cenv, irn, other2)) {
475                                         other2 = NULL;
476                                 }
477                         }
478                         count = 1 + (other != NULL) + (other2 != NULL && other != other2);
479
480                         if (count > 1) {
481                                 int i = 0;
482
483                                 unit->nodes = xmalloc(count * sizeof(*unit->nodes));
484                                 unit->costs = xmalloc(count * sizeof(*unit->costs));
485                                 unit->node_count = count;
486                                 unit->nodes[i] = irn;
487                                 if (other != NULL) {
488                                         ++i;
489                                         unit->nodes[i] = other;
490                                         unit->costs[i] = co->get_costs(co, irn, other, -1);
491                                 }
492                                 if (other2 != NULL) {
493                                         if (other == other2) {
494                                                 unit->costs[i] += co->get_costs(co, irn, other2, -1);
495                                         } else {
496                                                 ++i;
497                                                 unit->nodes[i] = other2;
498                                                 unit->costs[i] = co->get_costs(co, irn, other2, -1);
499                                         }
500                                 }
501                         }
502                 } else {
503                         assert(0 && "This is not an optimizable node!");
504                 }
505         }
506
507         /* Insert the new unit at a position according to its costs */
508         if (unit->node_count > 1) {
509                 int i;
510                 struct list_head *tmp;
511
512                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
513                 for(i=1; i<unit->node_count; ++i) {
514                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
515                         unit->all_nodes_costs += unit->costs[i];
516                 }
517
518                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
519                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
520                 /* Insert the new ou according to its sort_key */
521                 tmp = &co->units;
522                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
523                         tmp = tmp->next;
524                 list_add(&unit->units, tmp);
525         } else {
526                 free(unit);
527         }
528 }
529
530 #ifdef QUICK_AND_DIRTY_HACK
531
532 static int compare_ous(const void *k1, const void *k2) {
533         const unit_t *u1 = *((const unit_t **) k1);
534         const unit_t *u2 = *((const unit_t **) k2);
535         int i, o, u1_has_constr, u2_has_constr;
536         arch_register_req_t req;
537         const arch_env_t *aenv = u1->co->aenv;
538
539         /* Units with constraints come first */
540         u1_has_constr = 0;
541         for (i=0; i<u1->node_count; ++i) {
542                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
543                 if (arch_register_req_is(&req, limited)) {
544                         u1_has_constr = 1;
545                         break;
546                 }
547         }
548
549         u2_has_constr = 0;
550         for (i=0; i<u2->node_count; ++i) {
551                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
552                 if (arch_register_req_is(&req, limited)) {
553                         u2_has_constr = 1;
554                         break;
555                 }
556         }
557
558         if (u1_has_constr != u2_has_constr)
559                 return u2_has_constr - u1_has_constr;
560
561         /* Now check, whether the two units are connected */
562 #if 0
563         for (i=0; i<u1->node_count; ++i)
564                 for (o=0; o<u2->node_count; ++o)
565                         if (u1->nodes[i] == u2->nodes[o])
566                                 return 0;
567 #endif
568
569         /* After all, the sort key decides. Greater keys come first. */
570         return u2->sort_key - u1->sort_key;
571
572 }
573
574 /**
575  * Sort the ou's according to constraints and their sort_key
576  */
577 static void co_sort_units(copy_opt_t *co) {
578         int i, count = 0, costs;
579         unit_t *ou, **ous;
580
581         /* get the number of ous, remove them form the list and fill the array */
582         list_for_each_entry(unit_t, ou, &co->units, units)
583                 count++;
584         ous = alloca(count * sizeof(*ous));
585
586         costs = co_get_max_copy_costs(co);
587
588         i = 0;
589         list_for_each_entry(unit_t, ou, &co->units, units)
590                 ous[i++] = ou;
591
592         INIT_LIST_HEAD(&co->units);
593
594         assert(count == i && list_empty(&co->units));
595
596         for (i=0; i<count; ++i)
597                 ir_printf("%+F\n", ous[i]->nodes[0]);
598
599         qsort(ous, count, sizeof(*ous), compare_ous);
600
601         ir_printf("\n\n");
602         for (i=0; i<count; ++i)
603                 ir_printf("%+F\n", ous[i]->nodes[0]);
604
605         /* reinsert into list in correct order */
606         for (i=0; i<count; ++i)
607                 list_add_tail(&ous[i]->units, &co->units);
608
609         assert(costs == co_get_max_copy_costs(co));
610 }
611 #endif
612
613 void co_build_ou_structure(copy_opt_t *co) {
614         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
615         INIT_LIST_HEAD(&co->units);
616         irg_walk_graph(co->irg, co_collect_units, NULL, co);
617 #ifdef QUICK_AND_DIRTY_HACK
618         co_sort_units(co);
619 #endif
620 }
621
622 void co_free_ou_structure(copy_opt_t *co) {
623         unit_t *curr, *tmp;
624         ASSERT_OU_AVAIL(co);
625         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
626                 xfree(curr->nodes);
627                 xfree(curr->costs);
628                 xfree(curr);
629         }
630         co->units.next = NULL;
631 }
632
633 /* co_solve_heuristic() is implemented in becopyheur.c */
634
635 int co_get_max_copy_costs(const copy_opt_t *co) {
636         int i, res = 0;
637         unit_t *curr;
638
639         ASSERT_OU_AVAIL(co);
640
641         list_for_each_entry(unit_t, curr, &co->units, units) {
642                 res += curr->inevitable_costs;
643                 for (i=1; i<curr->node_count; ++i)
644                         res += curr->costs[i];
645         }
646         return res;
647 }
648
649 int co_get_inevit_copy_costs(const copy_opt_t *co) {
650         int res = 0;
651         unit_t *curr;
652
653         ASSERT_OU_AVAIL(co);
654
655         list_for_each_entry(unit_t, curr, &co->units, units)
656                 res += curr->inevitable_costs;
657         return res;
658 }
659
660 int co_get_copy_costs(const copy_opt_t *co) {
661         int i, res = 0;
662         unit_t *curr;
663
664         ASSERT_OU_AVAIL(co);
665
666         list_for_each_entry(unit_t, curr, &co->units, units) {
667                 int root_col = get_irn_col(co, curr->nodes[0]);
668                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
669                 res += curr->inevitable_costs;
670                 for (i=1; i<curr->node_count; ++i) {
671                         int arg_col = get_irn_col(co, curr->nodes[i]);
672                         if (root_col != arg_col) {
673                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
674                                 res += curr->costs[i];
675                         }
676                 }
677         }
678         return res;
679 }
680
681 int co_get_lower_bound(const copy_opt_t *co) {
682         int res = 0;
683         unit_t *curr;
684
685         ASSERT_OU_AVAIL(co);
686
687         list_for_each_entry(unit_t, curr, &co->units, units)
688                 res += curr->inevitable_costs + curr->min_nodes_costs;
689         return res;
690 }
691
692 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
693 {
694         bitset_t *seen = bitset_irg_malloc(co->irg);
695         affinity_node_t *an;
696
697         memset(stat, 0, sizeof(stat[0]));
698
699         /* count affinity edges. */
700         co_gs_foreach_aff_node(co, an) {
701                 neighb_t *neigh;
702                 stat->aff_nodes += 1;
703                 bitset_add_irn(seen, an->irn);
704                 co_gs_foreach_neighb(an, neigh) {
705                         if(!bitset_contains_irn(seen, neigh->irn)) {
706                                 stat->aff_edges += 1;
707                                 stat->max_costs += neigh->costs;
708
709                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
710                                         stat->costs += neigh->costs;
711                                         stat->unsatisfied_edges += 1;
712                                 }
713
714                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
715                                         stat->aff_int += 1;
716                                         stat->inevit_costs += neigh->costs;
717                                 }
718
719                         }
720                 }
721         }
722
723         bitset_free(seen);
724 }
725
726 /******************************************************************************
727    _____                 _        _____ _
728   / ____|               | |      / ____| |
729  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
730  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
731  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
732   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
733                   | |                                       __/ |
734                   |_|                                      |___/
735  ******************************************************************************/
736
737 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
738         const affinity_node_t *n1 = k1;
739         const affinity_node_t *n2 = k2;
740         (void) size;
741
742         return (n1->irn != n2->irn);
743 }
744
745 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
746         affinity_node_t new_node, *node;
747         neighb_t        *nbr;
748         int             allocnew = 1;
749
750         new_node.irn        = n1;
751         new_node.degree     = 0;
752         new_node.neighbours = NULL;
753         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
754
755         for (nbr = node->neighbours; nbr; nbr = nbr->next)
756                 if (nbr->irn == n2) {
757                         allocnew = 0;
758                         break;
759                 }
760
761         /* if we did not find n2 in n1's neighbourhood insert it */
762         if (allocnew) {
763                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
764                 nbr->irn   = n2;
765                 nbr->costs = 0;
766                 nbr->next  = node->neighbours;
767
768                 node->neighbours = nbr;
769                 node->degree++;
770         }
771
772         /* now nbr points to n1's neighbour-entry of n2 */
773         nbr->costs += costs;
774 }
775
776 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
777         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
778                 add_edge(co, n1, n2, costs);
779                 add_edge(co, n2, n1, costs);
780         }
781 }
782
783 static void build_graph_walker(ir_node *irn, void *env) {
784         copy_opt_t *co = env;
785         int pos, max;
786         const arch_register_t *reg;
787
788         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
789                 return;
790
791         reg = arch_get_irn_register(co->aenv, irn);
792         if (arch_register_type_is(reg, ignore))
793                 return;
794
795         if (is_Reg_Phi(irn)) { /* Phis */
796                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
797                         ir_node *arg = get_irn_n(irn, pos);
798                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
799                 }
800         }
801         else if (is_Perm_Proj(co->aenv, irn)) { /* Perms */
802                 ir_node *arg = get_Perm_src(irn);
803                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
804         }
805         else { /* 2-address code */
806                 const arch_register_req_t *req = arch_get_register_req(co->aenv, irn, -1);
807                 if (is_2addr_code(req)) {
808                         const int *i;
809                         for (i = req->other_same; i != ENDOF(req->other_same); ++i) {
810                                 ir_node *other;
811
812                                 if (*i == -1) break;
813
814                                 other = get_irn_n(skip_Proj(irn), *i);
815                                 if (! arch_irn_is(co->aenv, other, ignore))
816                                         add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
817                         }
818                 }
819         }
820 }
821
822 void co_build_graph_structure(copy_opt_t *co) {
823         obstack_init(&co->obst);
824         co->nodes = new_set(compare_affinity_node_t, 32);
825
826         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
827 }
828
829 void co_free_graph_structure(copy_opt_t *co) {
830         ASSERT_GS_AVAIL(co);
831
832         del_set(co->nodes);
833         obstack_free(&co->obst, NULL);
834         co->nodes = NULL;
835 }
836
837 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
838
839 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
840         affinity_node_t new_node, *n;
841
842         ASSERT_GS_AVAIL(co);
843
844         new_node.irn = irn;
845         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
846         if (n) {
847                 return (n->degree > 0);
848         } else
849                 return 0;
850 }
851
852 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
853 {
854         be_ifg_t *ifg  = co->cenv->ifg;
855         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
856
857         ir_node *irn;
858         void *it, *nit;
859         int n, n_regs;
860         unsigned i;
861
862         n_regs = 0;
863         for(i = 0; i < co->cls->n_regs; ++i) {
864                 const arch_register_t *reg = &co->cls->regs[i];
865                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
866         }
867
868         /*
869          * n contains the first node number.
870          * the values below n are the pre-colored register nodes
871          */
872
873         it  = be_ifg_nodes_iter_alloca(ifg);
874         nit = be_ifg_neighbours_iter_alloca(ifg);
875
876         n = n_regs;
877         be_ifg_foreach_node(ifg, it, irn) {
878                 if(!arch_irn_is(co->aenv, irn, ignore))
879                         set_irn_link(irn, INT_TO_PTR(n++));
880         }
881
882         fprintf(f, "%d %d\n", n, n_regs);
883
884         be_ifg_foreach_node(ifg, it, irn) {
885                 if(!arch_irn_is(co->aenv, irn, ignore)) {
886                         int idx            = PTR_TO_INT(get_irn_link(irn));
887                         affinity_node_t *a = get_affinity_info(co, irn);
888
889                         const arch_register_req_t *req;
890                         ir_node *adj;
891
892                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
893                         if(arch_register_req_is(req, limited)) {
894                                 for(i = 0; i < co->cls->n_regs; ++i) {
895                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
896                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
897                                 }
898                         }
899
900
901                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
902                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
903                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
904                                         if(idx < adj_idx)
905                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
906                                 }
907                         }
908
909                         if(a) {
910                                 neighb_t *n;
911
912                                 co_gs_foreach_neighb(a, n) {
913                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
914                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
915                                                 if(idx < n_idx)
916                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
917                                         }
918                                 }
919                         }
920                 }
921         }
922 }
923
924 typedef struct _appel_clique_walker_t {
925         ir_phase ph;
926         const copy_opt_t *co;
927         int curr_nr;
928         int node_count;
929         FILE *f;
930         int dumb;
931         int *color_map;
932         struct obstack obst;
933 } appel_clique_walker_t;
934
935 typedef struct _appel_block_info_t {
936         int *live_end_nr;
937         int *live_in_nr;
938         int *phi_nr;
939         ir_node **live_end;
940         ir_node **live_in;
941         ir_node **phi;
942         int n_live_end;
943         int n_live_in;
944         int n_phi;
945 } appel_block_info_t;
946
947 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
948 {
949 #if 0
950         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
951         int res = (int) freq;
952         return res == 0 ? 1 : res;
953 #else
954         ir_loop *loop = get_irn_loop(bl);
955         (void) env;
956         if(loop) {
957                 int d = get_loop_depth(loop);
958                 return 1 + d * d;
959         }
960         return 1;
961 #endif
962 }
963
964 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
965 {
966         appel_block_info_t *res = NULL;
967         (void) old;
968
969         if(is_Block(irn)) {
970                 appel_clique_walker_t *d = (void *) phase;
971                 res = phase_alloc(phase, sizeof(res[0]));
972                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
973                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
974                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
975                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
976                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
977                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
978         }
979
980         return res;
981 }
982
983 typedef struct _insn_list_t {
984         be_insn_t *insn;
985         struct list_head list;
986 } insn_list_t;
987
988 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
989 {
990         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
991         int i;
992
993         for(i = 0; i < bli->n_live_end; ++i)
994                 if(bli->live_end[i] == irn)
995                         return bli->live_end_nr[i];
996
997         return -1;
998 }
999
1000 static int appel_dump_clique(appel_clique_walker_t *env, const ir_nodeset_t *live, ir_node *bl, int curr_nr, int start_nr)
1001 {
1002         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
1003         ir_node *irn;
1004         int n_live;
1005         int j;
1006         ir_nodeset_iterator_t iter;
1007
1008         n_live = 0;
1009         foreach_ir_nodeset(live, irn, iter)
1010                 live_arr[n_live++] = irn;
1011
1012         /* dump the live after clique */
1013         if(!env->dumb) {
1014                 for(j = 0; j < n_live; ++j) {
1015                         int k;
1016
1017                         for(k = j + 1; k < n_live; ++k) {
1018                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
1019                         }
1020                         fprintf(env->f, "\n");
1021                 }
1022         }
1023
1024         /* dump the affinities */
1025         for(j = 0; !env->dumb && j < n_live; ++j) {
1026                 ir_node *irn = live_arr[j];
1027                 int old_nr = PTR_TO_INT(get_irn_link(irn));
1028
1029                 /* if the node was already live in the last insn dump the affinity */
1030                 if(old_nr > start_nr) {
1031                         int weight = appel_aff_weight(env, bl);
1032                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
1033                 }
1034         }
1035
1036         /* set the current numbers into the link field. */
1037         for(j = 0; j < n_live; ++j) {
1038                 ir_node *irn = live_arr[j];
1039                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1040         }
1041
1042         return curr_nr + n_live;
1043 }
1044
1045 static void appel_walker(ir_node *bl, void *data)
1046 {
1047         appel_clique_walker_t *env = data;
1048         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1049         struct obstack *obst       = &env->obst;
1050         void *base                 = obstack_base(obst);
1051         ir_nodeset_t live;
1052         ir_nodeset_iterator_t iter;
1053         be_lv_t *lv                = env->co->cenv->birg->lv;
1054
1055         int n_insns  = 0;
1056         int n_nodes  = 0;
1057         int start_nr = env->curr_nr;
1058         int curr_nr  = start_nr;
1059
1060         be_insn_env_t insn_env;
1061         int i, j;
1062         ir_node *irn;
1063         be_insn_t **insns;
1064
1065         insn_env.aenv = env->co->aenv;
1066         insn_env.cls  = env->co->cls;
1067         insn_env.obst = obst;
1068         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1069
1070         /* Guess how many insns will be in this block. */
1071         sched_foreach(bl, irn)
1072                 n_nodes++;
1073
1074         bli->n_phi = 0;
1075         insns = xmalloc(n_nodes * sizeof(insns[0]));
1076
1077         /* Put all insns in an array. */
1078         irn = sched_first(bl);
1079         while(!sched_is_end(irn)) {
1080                 be_insn_t *insn;
1081                 insn = be_scan_insn(&insn_env, irn);
1082                 insns[n_insns++] = insn;
1083                 irn = insn->next_insn;
1084         }
1085
1086         DBG((dbg, LEVEL_2, "%+F\n", bl));
1087         ir_nodeset_init(&live);
1088         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, &live);
1089
1090         /* Generate the bad and ugly. */
1091         for(i = n_insns - 1; i >= 0; --i) {
1092                 be_insn_t *insn = insns[i];
1093
1094                 /* The first live set has to be saved in the block border set. */
1095                 if(i == n_insns - 1) {
1096                         j = 0;
1097                         foreach_ir_nodeset(&live, irn, iter) {
1098                                 bli->live_end[j]    = irn;
1099                                 bli->live_end_nr[j] = curr_nr + j;
1100                                 ++j;
1101                         }
1102                         bli->n_live_end = j;
1103                 }
1104
1105                 if(!env->dumb) {
1106                         for(j = 0; j < insn->use_start; ++j) {
1107                                 ir_node *op   = insn->ops[j].carrier;
1108                                 bitset_t *adm = insn->ops[j].regs;
1109                                 unsigned k;
1110                                 size_t nr;
1111
1112                                 if(!insn->ops[j].has_constraints)
1113                                         continue;
1114
1115                                 nr = 0;
1116                                 foreach_ir_nodeset(&live, irn, iter) {
1117                                         if(irn == op) {
1118                                                 break;
1119                                         }
1120                                         ++nr;
1121                                 }
1122
1123                                 assert(nr < ir_nodeset_size(&live));
1124
1125                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1126                                         int mapped_col = env->color_map[k];
1127                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1128                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1129                                 }
1130                         }
1131                 }
1132
1133                 /* dump the clique and update the stuff. */
1134                 curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1135
1136                 /* remove all defs. */
1137                 for(j = 0; j < insn->use_start; ++j)
1138                         ir_nodeset_remove(&live, insn->ops[j].carrier);
1139
1140                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1141                         bli->phi[bli->n_phi]    = insn->irn;
1142                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1143                         bli->n_phi++;
1144                 }
1145
1146                 /* add all uses */
1147                 else
1148                         for(j = insn->use_start; j < insn->n_ops; ++j)
1149                                 ir_nodeset_insert(&live, insn->ops[j].carrier);
1150         }
1151
1152         /* print the start clique. */
1153         curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1154
1155         i = 0;
1156         foreach_ir_nodeset(&live, irn, iter) {
1157                 bli->live_in[i]    = irn;
1158                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1159                 ++i;
1160         }
1161         bli->n_live_in = i;
1162
1163         ir_nodeset_destroy(&live);
1164         free(insns);
1165         obstack_free(obst, base);
1166         env->curr_nr = curr_nr;
1167 }
1168
1169 static void appel_inter_block_aff(ir_node *bl, void *data)
1170 {
1171         appel_clique_walker_t *env = data;
1172         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1173
1174         int i, j, n;
1175
1176         for(i = 0; i < bli->n_live_in; ++i) {
1177                 ir_node *irn = bli->live_in[i];
1178
1179                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1180                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1181
1182                         int nr = appel_get_live_end_nr(env, pred, irn);
1183                         assert(nr >= 0);
1184                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1185                 }
1186         }
1187
1188         for(i = 0; i < bli->n_phi; ++i) {
1189                 ir_node *irn = bli->phi[i];
1190
1191                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1192                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1193                         ir_node *op = get_irn_n(irn, j);
1194
1195                         int nr = appel_get_live_end_nr(env, pred, op);
1196                         assert(nr >= 0);
1197                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1198                 }
1199         }
1200
1201 }
1202
1203 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1204 {
1205         unsigned i;
1206         unsigned n_colors;
1207         appel_clique_walker_t env;
1208         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1209         be_lv_t *lv = co->cenv->birg->lv;
1210
1211         be_liveness_recompute(lv);
1212         obstack_init(&env.obst);
1213         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1214         env.curr_nr = co->cls->n_regs;
1215         env.co = co;
1216         env.f = f;
1217
1218         bitset_copy(adm, co->cenv->ignore_colors);
1219         bitset_flip_all(adm);
1220
1221         /* Make color map. */
1222         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1223         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1224                 const arch_register_t *reg = &co->cls->regs[i];
1225                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : (int) n_colors++;
1226         }
1227
1228         env.dumb = 1;
1229         env.curr_nr = n_colors;
1230         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1231         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1232
1233         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1234
1235         /* make the first k nodes interfere */
1236         for(i = 0; i < n_colors; ++i) {
1237                 unsigned j;
1238                 for(j = i + 1; j < n_colors; ++j)
1239                         fprintf(f, "%d %d -1 ", i, j);
1240                 fprintf(f, "\n");
1241         }
1242
1243         env.dumb = 0;
1244         env.curr_nr = n_colors;
1245         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1246         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1247         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1248         obstack_free(&env.obst, NULL);
1249 }
1250
1251 /*
1252          ___ _____ ____   ____   ___ _____   ____                        _
1253         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1254          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1255          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1256         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1257                                                                   |_|            |___/
1258 */
1259
1260 static const char *get_dot_color_name(size_t col)
1261 {
1262         static const char *names[] = {
1263                 "blue",
1264                 "red",
1265                 "green",
1266                 "yellow",
1267                 "cyan",
1268                 "magenta",
1269                 "orange",
1270                 "chocolate",
1271                 "beige",
1272                 "navy",
1273                 "darkgreen",
1274                 "darkred",
1275                 "lightPink",
1276                 "chartreuse",
1277                 "lightskyblue",
1278                 "linen",
1279                 "pink",
1280                 "lightslateblue",
1281                 "mintcream",
1282                 "red",
1283                 "darkolivegreen",
1284                 "mediumblue",
1285                 "mistyrose",
1286                 "salmon",
1287                 "darkseagreen",
1288                 "mediumslateblue"
1289                 "moccasin",
1290                 "tomato",
1291                 "forestgreen",
1292                 "darkturquoise",
1293                 "palevioletred"
1294         };
1295
1296         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1297 }
1298
1299 typedef struct _co_ifg_dump_t {
1300         const copy_opt_t *co;
1301         unsigned flags;
1302 } co_ifg_dump_t;
1303
1304 static void ifg_dump_graph_attr(FILE *f, void *self)
1305 {
1306         (void) self;
1307         fprintf(f, "overlap=scale");
1308 }
1309
1310 static int ifg_is_dump_node(void *self, ir_node *irn)
1311 {
1312         co_ifg_dump_t *cod = self;
1313         return !arch_irn_is(cod->co->aenv, irn, ignore);
1314 }
1315
1316 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1317 {
1318         co_ifg_dump_t *env         = self;
1319         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1320         const arch_register_req_t *req;
1321         int limited;
1322
1323         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1324         limited = arch_register_req_is(req, limited);
1325
1326         if(env->flags & CO_IFG_DUMP_LABELS) {
1327                 ir_fprintf(f, "label=\"%+F", irn);
1328
1329 #if 0
1330                 // TODO fix this...
1331                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1332                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1333                         req.limited(req.limited_env, bs);
1334                         ir_fprintf(f, "\\n%B", bs);
1335                 }
1336 #endif
1337                 ir_fprintf(f, "\" ");
1338         } else {
1339                 fprintf(f, "label=\"\" shape=point " );
1340         }
1341
1342         if(env->flags & CO_IFG_DUMP_SHAPE)
1343                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1344
1345         if(env->flags & CO_IFG_DUMP_COLORS)
1346                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1347 }
1348
1349 static void ifg_dump_at_end(FILE *file, void *self)
1350 {
1351         co_ifg_dump_t *env = self;
1352         affinity_node_t *a;
1353
1354         co_gs_foreach_aff_node(env->co, a) {
1355                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1356                 unsigned aidx = get_irn_idx(a->irn);
1357                 neighb_t *n;
1358
1359                 co_gs_foreach_neighb(a, n) {
1360                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1361                         unsigned nidx = get_irn_idx(n->irn);
1362
1363                         if(aidx < nidx) {
1364                                 const char *color = nr == ar ? "blue" : "red";
1365                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1366                                 if(env->flags & CO_IFG_DUMP_LABELS)
1367                                         fprintf(file, "label=\"%d\" ", n->costs);
1368                                 if(env->flags & CO_IFG_DUMP_COLORS)
1369                                         fprintf(file, "color=%s ", color);
1370                                 else
1371                                         fprintf(file, "style=dotted");
1372                                 fprintf(file, "];\n");
1373                         }
1374                 }
1375         }
1376 }
1377
1378
1379 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1380         ifg_is_dump_node,
1381         ifg_dump_graph_attr,
1382         ifg_dump_node_attr,
1383         NULL,
1384         NULL,
1385         ifg_dump_at_end
1386 };
1387
1388
1389
1390 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1391 {
1392         co_ifg_dump_t cod;
1393
1394         cod.co    = co;
1395         cod.flags = flags;
1396         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1397 }
1398
1399
1400 void co_solve_park_moon(copy_opt_t *opt)
1401 {
1402         (void) opt;
1403 }
1404
1405 static int void_algo(copy_opt_t *co)
1406 {
1407         (void) co;
1408         return 0;
1409 }
1410
1411 /*
1412                 _    _                  _ _   _
1413            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1414           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1415          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1416         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1417                            |___/
1418 */
1419
1420 typedef struct {
1421         co_algo_t  *algo;
1422         const char *name;
1423         int        can_improve_existing;
1424 } co_algo_info_t;
1425
1426 static co_algo_info_t algos[] = {
1427         { void_algo,               "none",  0 },
1428         { co_solve_heuristic,      "heur1", 0 },
1429         { co_solve_heuristic_new,  "heur2", 0 },
1430         { co_solve_heuristic_java, "heur3", 0 },
1431         { co_solve_heuristic_mst,  "heur4", 0 },
1432 #ifdef WITH_ILP
1433         { co_solve_ilp2,           "ilp",   1 },
1434 #endif
1435         { NULL,                    "",      0 }
1436 };
1437
1438 /*
1439     __  __       _         ____       _
1440    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1441    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1442    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1443    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1444
1445 */
1446
1447 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1448 {
1449         FILE *result;
1450         char buf[1024];
1451
1452         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1453         result = fopen(buf, "wt");
1454         if(result == NULL) {
1455                 panic("Couldn't open '%s' for writing.", buf);
1456         }
1457
1458         return result;
1459 }
1460
1461 void co_driver(be_chordal_env_t *cenv)
1462 {
1463         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1464         co_complete_stats_t before, after;
1465         copy_opt_t          *co;
1466         co_algo_t           *algo_func;
1467         int                 was_optimal = 0;
1468
1469         if (algo >= CO_ALGO_LAST)
1470                 return;
1471
1472         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1473
1474         co = new_copy_opt(cenv, cost_func);
1475         co_build_ou_structure(co);
1476         co_build_graph_structure(co);
1477
1478         co_complete_stats(co, &before);
1479
1480         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1481         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1482         be_stat_ev_ull("co_max_costs",    before.max_costs);
1483         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1484         be_stat_ev_ull("co_aff_int",      before.aff_int);
1485
1486         be_stat_ev_ull("co_init_costs",   before.costs);
1487         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1488
1489         /* Dump the interference graph in Appel's format. */
1490         if (dump_flags & DUMP_APPEL) {
1491                 FILE *f = my_open(cenv, "", ".apl");
1492                 co_dump_appel_graph(co, f);
1493                 fclose(f);
1494         }
1495
1496         if (dump_flags & DUMP_BEFORE) {
1497                 FILE *f = my_open(cenv, "", "-before.dot");
1498                 co_dump_ifg_dot(co, f, style_flags);
1499                 fclose(f);
1500         }
1501
1502         /* if the algo can improve results, provide an initial solution with heur3 */
1503         if (improve && algos[algo].can_improve_existing) {
1504                 co_complete_stats_t stats;
1505
1506                 /* produce a heuristic solution */
1507 #ifdef WITH_JVM
1508                 co_solve_heuristic_java(co);
1509 #else
1510                 co_solve_heuristic(co);
1511 #endif /* WITH_JVM */
1512
1513                 /* do the stats and provide the current costs */
1514                 co_complete_stats(co, &stats);
1515                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1516         }
1517
1518 #ifdef WITH_JVM
1519         /* start the JVM here so that it does not tamper the timing. */
1520         if (algo == CO_ALGO_HEUR3)
1521                 be_java_coal_start_jvm();
1522 #endif /* WITH_JVM */
1523
1524         algo_func = algos[algo].algo;
1525
1526         /* perform actual copy minimization */
1527         lc_timer_reset_and_start(timer);
1528         was_optimal = algo_func(co);
1529         lc_timer_stop(timer);
1530
1531         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1532         be_stat_ev_ull("co_optimal", was_optimal);
1533
1534         if (dump_flags & DUMP_AFTER) {
1535                 FILE *f = my_open(cenv, "", "-after.dot");
1536                 co_dump_ifg_dot(co, f, style_flags);
1537                 fclose(f);
1538         }
1539
1540         co_complete_stats(co, &after);
1541
1542         if (do_stats) {
1543                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1544                 ulong64 evitable          = after.costs     - after.inevit_costs;
1545
1546                 ir_printf("%30F ", cenv->irg);
1547                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1548
1549                 if(optimizable_costs > 0)
1550                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1551                 else
1552                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1553         }
1554
1555         be_stat_ev_ull("co_after_costs", after.costs);
1556         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1557
1558         co_free_graph_structure(co);
1559         co_free_ou_structure(co);
1560         free_copy_opt(co);
1561 }