report nextuse place in beuses, add a function which reports normal reload costs...
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "benodesets.h"
64 #include "bejavacoal.h"
65 #include "bestatevent.h"
66 #include "beirg_t.h"
67 #include "error.h"
68
69 #include <libcore/lc_timing.h>
70 #include <libcore/lc_opts.h>
71 #include <libcore/lc_opts_enum.h>
72
73 #define DUMP_BEFORE 1
74 #define DUMP_AFTER  2
75 #define DUMP_APPEL  4
76 #define DUMP_ALL    2 * DUMP_APPEL - 1
77
78 #define COST_FUNC_FREQ     1
79 #define COST_FUNC_LOOP     2
80 #define COST_FUNC_ALL_ONE  3
81
82 static unsigned   dump_flags  = 0;
83 static unsigned   style_flags = 0;
84 static unsigned   do_stats    = 0;
85 static cost_fct_t cost_func   = co_get_costs_exec_freq;
86 static unsigned   algo        = CO_ALGO_HEUR4;
87 static int        improve     = 1;
88
89 static const lc_opt_enum_mask_items_t dump_items[] = {
90         { "before",  DUMP_BEFORE },
91         { "after",   DUMP_AFTER  },
92         { "appel",   DUMP_APPEL  },
93         { "all",     DUMP_ALL    },
94         { NULL,      0 }
95 };
96
97 static const lc_opt_enum_mask_items_t style_items[] = {
98         { "color",   CO_IFG_DUMP_COLORS },
99         { "labels",  CO_IFG_DUMP_LABELS },
100         { "constr",  CO_IFG_DUMP_CONSTR },
101         { "shape",   CO_IFG_DUMP_SHAPE  },
102         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
103         { NULL,      0 }
104 };
105
106 static const lc_opt_enum_mask_items_t algo_items[] = {
107         { "none",   CO_ALGO_NONE  },
108         { "heur",   CO_ALGO_HEUR  },
109         { "heur2",  CO_ALGO_HEUR2 },
110         { "heur3",  CO_ALGO_HEUR3 },
111         { "heur4",  CO_ALGO_HEUR4 },
112         { "ilp",    CO_ALGO_ILP   },
113         { NULL,     0 }
114 };
115
116 typedef int (*opt_funcptr)(void);
117
118 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
119         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
120         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
121         { "one",    (opt_funcptr) co_get_costs_all_one },
122         { NULL,     NULL }
123 };
124
125 static lc_opt_enum_mask_var_t dump_var = {
126         &dump_flags, dump_items
127 };
128
129 static lc_opt_enum_mask_var_t style_var = {
130         &style_flags, style_items
131 };
132
133 static lc_opt_enum_mask_var_t algo_var = {
134         &algo, algo_items
135 };
136
137 static lc_opt_enum_func_ptr_var_t cost_func_var = {
138         (opt_funcptr*) &cost_func, cost_func_items
139 };
140
141 static const lc_opt_table_entry_t options[] = {
142         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
143         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
144         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
145         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
146         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
147         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
148         LC_OPT_LAST
149 };
150
151 /* Insert additional options registration functions here. */
152 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
153 extern void be_co2_register_options(lc_opt_entry_t *grp);
154 extern void be_co3_register_options(lc_opt_entry_t *grp);
155
156 void be_init_copycoal(void)
157 {
158         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
159         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
160         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
161         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
162
163         lc_opt_add_table(co_grp, options);
164 }
165
166 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
167
168 #undef QUICK_AND_DIRTY_HACK
169
170 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
171 {
172         if (env->ifg)
173                 return be_ifg_connected(env->ifg, a, b);
174         else
175                 return values_interfere(env->birg, a, b);
176 }
177
178
179 /******************************************************************************
180     _____                           _
181    / ____|                         | |
182   | |  __  ___ _ __   ___ _ __ __ _| |
183   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
184   | |__| |  __/ | | |  __/ | | (_| | |
185    \_____|\___|_| |_|\___|_|  \__,_|_|
186
187  ******************************************************************************/
188
189 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
190
191
192 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
193 {
194         const char *s1, *s2, *s3;
195         int len;
196         copy_opt_t *co;
197
198         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
199
200         co = xcalloc(1, sizeof(*co));
201         co->cenv      = chordal_env;
202         co->aenv      = chordal_env->birg->main_env->arch_env;
203         co->irg       = chordal_env->irg;
204         co->cls       = chordal_env->cls;
205         co->get_costs = get_costs;
206
207         s1 = get_irp_prog_name();
208         s2 = get_entity_name(get_irg_entity(co->irg));
209         s3 = chordal_env->cls->name;
210         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
211         co->name = xmalloc(len);
212         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
213
214         return co;
215 }
216
217 void free_copy_opt(copy_opt_t *co) {
218         xfree(co->name);
219         free(co);
220 }
221
222 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
223         const arch_register_req_t *req;
224         const arch_register_t *reg;
225
226         if (arch_irn_is(co->aenv, irn, ignore))
227                 return 0;
228
229         reg = arch_get_irn_register(co->aenv, irn);
230         if (arch_register_type_is(reg, ignore))
231                 return 0;
232
233         req = arch_get_register_req(co->aenv, irn, -1);
234         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
235                 return 1;
236
237         return 0;
238 }
239
240 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
241         int cost = 0;
242         ir_loop *loop;
243         ir_node *root_block = get_nodes_block(root);
244         (void) co;
245         (void) arg;
246
247         if (is_Phi(root)) {
248                 /* for phis the copies are placed in the corresponding pred-block */
249                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
250         } else {
251                 /* a perm places the copy in the same block as it resides */
252                 loop = get_irn_loop(root_block);
253         }
254         if (loop) {
255                 int d = get_loop_depth(loop);
256                 cost = d*d;
257         }
258         return 1+cost;
259 }
260
261 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
262         int res;
263         ir_node *root_bl = get_nodes_block(root);
264         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
265         (void) arg;
266         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
267
268         /* don't allow values smaller than one. */
269         return res < 1 ? 1 : res;
270 }
271
272
273 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
274         (void) co;
275         (void) root;
276         (void) arg;
277         (void) pos;
278         return 1;
279 }
280
281 /******************************************************************************
282    ____        _   _    _       _ _          _____ _
283   / __ \      | | | |  | |     (_) |        / ____| |
284  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
285  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
286  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
287   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
288         | |                                                            __/ |
289         |_|                                                           |___/
290  ******************************************************************************/
291
292 /**
293  * Determines a maximum weighted independent set with respect to
294  * the interference and conflict edges of all nodes in a qnode.
295  */
296 static int ou_max_ind_set_costs(unit_t *ou) {
297         be_chordal_env_t *chordal_env = ou->co->cenv;
298         ir_node **safe, **unsafe;
299         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
300         bitset_t *curr;
301         bitset_pos_t pos;
302         int max, curr_weight, best_weight = 0;
303
304         /* assign the nodes into two groups.
305          * safe: node has no interference, hence it is in every max stable set.
306          * unsafe: node has an interference
307          */
308         safe = alloca((ou->node_count-1) * sizeof(*safe));
309         safe_costs = 0;
310         safe_count = 0;
311         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
312         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
313         unsafe_count = 0;
314         for(i=1; i<ou->node_count; ++i) {
315                 int is_safe = 1;
316                 for(o=1; o<ou->node_count; ++o) {
317                         if (i==o)
318                                 continue;
319                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
320                                 unsafe_costs[unsafe_count] = ou->costs[i];
321                                 unsafe[unsafe_count] = ou->nodes[i];
322                                 ++unsafe_count;
323                                 is_safe = 0;
324                                 break;
325                         }
326                 }
327                 if (is_safe) {
328                         safe_costs += ou->costs[i];
329                         safe[safe_count++] = ou->nodes[i];
330                 }
331         }
332
333
334         /* now compute the best set out of the unsafe nodes*/
335         if (unsafe_count > MIS_HEUR_TRIGGER) {
336                 bitset_t *best = bitset_alloca(unsafe_count);
337                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
338                 for (i=0; i<unsafe_count; ++i) {
339                         bitset_set(best, i);
340                         /* check if it is a stable set */
341                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
342                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
343                                         bitset_clear(best, i); /* clear the bit and try next one */
344                                         break;
345                                 }
346                 }
347                 /* compute the weight */
348                 bitset_foreach(best, pos)
349                         best_weight += unsafe_costs[pos];
350         } else {
351                 /* Exact Algorithm: Brute force */
352                 curr = bitset_alloca(unsafe_count);
353                 bitset_set_all(curr);
354                 while ((max = bitset_popcnt(curr)) != 0) {
355                         /* check if curr is a stable set */
356                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
357                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
358                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
359                                                         goto no_stable_set;
360
361                         /* if we arrive here, we have a stable set */
362                         /* compute the weigth of the stable set*/
363                         curr_weight = 0;
364                         bitset_foreach(curr, pos)
365                                 curr_weight += unsafe_costs[pos];
366
367                         /* any better ? */
368                         if (curr_weight > best_weight) {
369                                 best_weight = curr_weight;
370                         }
371
372         no_stable_set:
373                         bitset_minus1(curr);
374                 }
375         }
376
377         return safe_costs+best_weight;
378 }
379
380 static void co_collect_units(ir_node *irn, void *env) {
381         copy_opt_t *co = env;
382         unit_t *unit;
383
384         if (!is_curr_reg_class(co, irn))
385                 return;
386         if (!co_is_optimizable_root(co, irn))
387                 return;
388
389         /* Init a new unit */
390         unit = xcalloc(1, sizeof(*unit));
391         unit->co = co;
392         unit->node_count = 1;
393         INIT_LIST_HEAD(&unit->queue);
394
395         /* Phi with some/all of its arguments */
396         if (is_Reg_Phi(irn)) {
397                 int i, arity;
398
399                 /* init */
400                 arity = get_irn_arity(irn);
401                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
402                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
403                 unit->nodes[0] = irn;
404
405                 /* fill */
406                 for (i=0; i<arity; ++i) {
407                         int o, arg_pos;
408                         ir_node *arg = get_irn_n(irn, i);
409
410                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
411                         if (arg == irn)
412                                 continue;
413                         if (nodes_interfere(co->cenv, irn, arg)) {
414                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
415                                 continue;
416                         }
417
418                         /* Else insert the argument of the phi to the members of this ou */
419                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
420
421                         if (! arch_irn_is(co->aenv, arg, ignore)) {
422                                 /* Check if arg has occurred at a prior position in the arg/list */
423                                 arg_pos = 0;
424                                 for (o=1; o<unit->node_count; ++o) {
425                                         if (unit->nodes[o] == arg) {
426                                                 arg_pos = o;
427                                                 break;
428                                         }
429                                 }
430
431                                 if (!arg_pos) { /* a new argument */
432                                         /* insert node, set costs */
433                                         unit->nodes[unit->node_count] = arg;
434                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
435                                         unit->node_count++;
436                                 } else { /* arg has occurred before in same phi */
437                                         /* increase costs for existing arg */
438                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
439                                 }
440                         }
441                 }
442                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
443                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
444         } else if (is_Perm_Proj(co->aenv, irn)) {
445                 /* Proj of a perm with corresponding arg */
446                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
447                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
448                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
449                 unit->node_count = 2;
450                 unit->nodes[0] = irn;
451                 unit->nodes[1] = get_Perm_src(irn);
452                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
453         } else {
454                 const arch_register_req_t *req =
455                         arch_get_register_req(co->aenv, irn, -1);
456
457                 /* Src == Tgt of a 2-addr-code instruction */
458                 if (is_2addr_code(req)) {
459                         const unsigned other = req->other_same;
460                         int            count = 0;
461                         int            i;
462
463                         for (i = 0; (1U << i) <= other; ++i) {
464                                 if (other & (1U << i)) {
465                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
466                                         if (!arch_irn_is(co->aenv, o, ignore) &&
467                                                         !nodes_interfere(co->cenv, irn, o)) {
468                                                 ++count;
469                                         }
470                                 }
471                         }
472
473                         if (count != 0) {
474                                 int k = 0;
475                                 ++count;
476                                 unit->nodes = xmalloc(count * sizeof(*unit->nodes));
477                                 unit->costs = xmalloc(count * sizeof(*unit->costs));
478                                 unit->node_count = count;
479                                 unit->nodes[k++] = irn;
480
481                                 for (i = 0; 1U << i <= other; ++i) {
482                                         if (other & (1U << i)) {
483                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
484                                                 if (!arch_irn_is(co->aenv, o, ignore) &&
485                                                                 !nodes_interfere(co->cenv, irn, o)) {
486                                                         unit->nodes[k] = o;
487                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
488                                                         ++k;
489                                                 }
490                                         }
491                                 }
492                         }
493                 } else {
494                         assert(0 && "This is not an optimizable node!");
495                 }
496         }
497
498         /* Insert the new unit at a position according to its costs */
499         if (unit->node_count > 1) {
500                 int i;
501                 struct list_head *tmp;
502
503                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
504                 for(i=1; i<unit->node_count; ++i) {
505                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
506                         unit->all_nodes_costs += unit->costs[i];
507                 }
508
509                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
510                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
511                 /* Insert the new ou according to its sort_key */
512                 tmp = &co->units;
513                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
514                         tmp = tmp->next;
515                 list_add(&unit->units, tmp);
516         } else {
517                 free(unit);
518         }
519 }
520
521 #ifdef QUICK_AND_DIRTY_HACK
522
523 static int compare_ous(const void *k1, const void *k2) {
524         const unit_t *u1 = *((const unit_t **) k1);
525         const unit_t *u2 = *((const unit_t **) k2);
526         int i, o, u1_has_constr, u2_has_constr;
527         arch_register_req_t req;
528         const arch_env_t *aenv = u1->co->aenv;
529
530         /* Units with constraints come first */
531         u1_has_constr = 0;
532         for (i=0; i<u1->node_count; ++i) {
533                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
534                 if (arch_register_req_is(&req, limited)) {
535                         u1_has_constr = 1;
536                         break;
537                 }
538         }
539
540         u2_has_constr = 0;
541         for (i=0; i<u2->node_count; ++i) {
542                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
543                 if (arch_register_req_is(&req, limited)) {
544                         u2_has_constr = 1;
545                         break;
546                 }
547         }
548
549         if (u1_has_constr != u2_has_constr)
550                 return u2_has_constr - u1_has_constr;
551
552         /* Now check, whether the two units are connected */
553 #if 0
554         for (i=0; i<u1->node_count; ++i)
555                 for (o=0; o<u2->node_count; ++o)
556                         if (u1->nodes[i] == u2->nodes[o])
557                                 return 0;
558 #endif
559
560         /* After all, the sort key decides. Greater keys come first. */
561         return u2->sort_key - u1->sort_key;
562
563 }
564
565 /**
566  * Sort the ou's according to constraints and their sort_key
567  */
568 static void co_sort_units(copy_opt_t *co) {
569         int i, count = 0, costs;
570         unit_t *ou, **ous;
571
572         /* get the number of ous, remove them form the list and fill the array */
573         list_for_each_entry(unit_t, ou, &co->units, units)
574                 count++;
575         ous = alloca(count * sizeof(*ous));
576
577         costs = co_get_max_copy_costs(co);
578
579         i = 0;
580         list_for_each_entry(unit_t, ou, &co->units, units)
581                 ous[i++] = ou;
582
583         INIT_LIST_HEAD(&co->units);
584
585         assert(count == i && list_empty(&co->units));
586
587         for (i=0; i<count; ++i)
588                 ir_printf("%+F\n", ous[i]->nodes[0]);
589
590         qsort(ous, count, sizeof(*ous), compare_ous);
591
592         ir_printf("\n\n");
593         for (i=0; i<count; ++i)
594                 ir_printf("%+F\n", ous[i]->nodes[0]);
595
596         /* reinsert into list in correct order */
597         for (i=0; i<count; ++i)
598                 list_add_tail(&ous[i]->units, &co->units);
599
600         assert(costs == co_get_max_copy_costs(co));
601 }
602 #endif
603
604 void co_build_ou_structure(copy_opt_t *co) {
605         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
606         INIT_LIST_HEAD(&co->units);
607         irg_walk_graph(co->irg, co_collect_units, NULL, co);
608 #ifdef QUICK_AND_DIRTY_HACK
609         co_sort_units(co);
610 #endif
611 }
612
613 void co_free_ou_structure(copy_opt_t *co) {
614         unit_t *curr, *tmp;
615         ASSERT_OU_AVAIL(co);
616         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
617                 xfree(curr->nodes);
618                 xfree(curr->costs);
619                 xfree(curr);
620         }
621         co->units.next = NULL;
622 }
623
624 /* co_solve_heuristic() is implemented in becopyheur.c */
625
626 int co_get_max_copy_costs(const copy_opt_t *co) {
627         int i, res = 0;
628         unit_t *curr;
629
630         ASSERT_OU_AVAIL(co);
631
632         list_for_each_entry(unit_t, curr, &co->units, units) {
633                 res += curr->inevitable_costs;
634                 for (i=1; i<curr->node_count; ++i)
635                         res += curr->costs[i];
636         }
637         return res;
638 }
639
640 int co_get_inevit_copy_costs(const copy_opt_t *co) {
641         int res = 0;
642         unit_t *curr;
643
644         ASSERT_OU_AVAIL(co);
645
646         list_for_each_entry(unit_t, curr, &co->units, units)
647                 res += curr->inevitable_costs;
648         return res;
649 }
650
651 int co_get_copy_costs(const copy_opt_t *co) {
652         int i, res = 0;
653         unit_t *curr;
654
655         ASSERT_OU_AVAIL(co);
656
657         list_for_each_entry(unit_t, curr, &co->units, units) {
658                 int root_col = get_irn_col(co, curr->nodes[0]);
659                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
660                 res += curr->inevitable_costs;
661                 for (i=1; i<curr->node_count; ++i) {
662                         int arg_col = get_irn_col(co, curr->nodes[i]);
663                         if (root_col != arg_col) {
664                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
665                                 res += curr->costs[i];
666                         }
667                 }
668         }
669         return res;
670 }
671
672 int co_get_lower_bound(const copy_opt_t *co) {
673         int res = 0;
674         unit_t *curr;
675
676         ASSERT_OU_AVAIL(co);
677
678         list_for_each_entry(unit_t, curr, &co->units, units)
679                 res += curr->inevitable_costs + curr->min_nodes_costs;
680         return res;
681 }
682
683 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
684 {
685         bitset_t *seen = bitset_irg_malloc(co->irg);
686         affinity_node_t *an;
687
688         memset(stat, 0, sizeof(stat[0]));
689
690         /* count affinity edges. */
691         co_gs_foreach_aff_node(co, an) {
692                 neighb_t *neigh;
693                 stat->aff_nodes += 1;
694                 bitset_add_irn(seen, an->irn);
695                 co_gs_foreach_neighb(an, neigh) {
696                         if(!bitset_contains_irn(seen, neigh->irn)) {
697                                 stat->aff_edges += 1;
698                                 stat->max_costs += neigh->costs;
699
700                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
701                                         stat->costs += neigh->costs;
702                                         stat->unsatisfied_edges += 1;
703                                 }
704
705                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
706                                         stat->aff_int += 1;
707                                         stat->inevit_costs += neigh->costs;
708                                 }
709
710                         }
711                 }
712         }
713
714         bitset_free(seen);
715 }
716
717 /******************************************************************************
718    _____                 _        _____ _
719   / ____|               | |      / ____| |
720  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
721  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
722  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
723   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
724                   | |                                       __/ |
725                   |_|                                      |___/
726  ******************************************************************************/
727
728 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
729         const affinity_node_t *n1 = k1;
730         const affinity_node_t *n2 = k2;
731         (void) size;
732
733         return (n1->irn != n2->irn);
734 }
735
736 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
737         affinity_node_t new_node, *node;
738         neighb_t        *nbr;
739         int             allocnew = 1;
740
741         new_node.irn        = n1;
742         new_node.degree     = 0;
743         new_node.neighbours = NULL;
744         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
745
746         for (nbr = node->neighbours; nbr; nbr = nbr->next)
747                 if (nbr->irn == n2) {
748                         allocnew = 0;
749                         break;
750                 }
751
752         /* if we did not find n2 in n1's neighbourhood insert it */
753         if (allocnew) {
754                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
755                 nbr->irn   = n2;
756                 nbr->costs = 0;
757                 nbr->next  = node->neighbours;
758
759                 node->neighbours = nbr;
760                 node->degree++;
761         }
762
763         /* now nbr points to n1's neighbour-entry of n2 */
764         nbr->costs += costs;
765 }
766
767 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
768         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
769                 add_edge(co, n1, n2, costs);
770                 add_edge(co, n2, n1, costs);
771         }
772 }
773
774 static void build_graph_walker(ir_node *irn, void *env) {
775         copy_opt_t *co = env;
776         int pos, max;
777         const arch_register_t *reg;
778
779         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
780                 return;
781
782         reg = arch_get_irn_register(co->aenv, irn);
783         if (arch_register_type_is(reg, ignore))
784                 return;
785
786         if (is_Reg_Phi(irn)) { /* Phis */
787                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
788                         ir_node *arg = get_irn_n(irn, pos);
789                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
790                 }
791         }
792         else if (is_Perm_Proj(co->aenv, irn)) { /* Perms */
793                 ir_node *arg = get_Perm_src(irn);
794                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
795         }
796         else { /* 2-address code */
797                 const arch_register_req_t *req = arch_get_register_req(co->aenv, irn, -1);
798                 if (is_2addr_code(req)) {
799                         const unsigned other = req->other_same;
800                         int i;
801
802                         for (i = 0; 1U << i <= other; ++i) {
803                                 if (other & (1U << i)) {
804                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
805                                         if (! arch_irn_is(co->aenv, other, ignore))
806                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
807                                 }
808                         }
809                 }
810         }
811 }
812
813 void co_build_graph_structure(copy_opt_t *co) {
814         obstack_init(&co->obst);
815         co->nodes = new_set(compare_affinity_node_t, 32);
816
817         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
818 }
819
820 void co_free_graph_structure(copy_opt_t *co) {
821         ASSERT_GS_AVAIL(co);
822
823         del_set(co->nodes);
824         obstack_free(&co->obst, NULL);
825         co->nodes = NULL;
826 }
827
828 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
829
830 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
831         affinity_node_t new_node, *n;
832
833         ASSERT_GS_AVAIL(co);
834
835         new_node.irn = irn;
836         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
837         if (n) {
838                 return (n->degree > 0);
839         } else
840                 return 0;
841 }
842
843 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
844 {
845         be_ifg_t *ifg  = co->cenv->ifg;
846         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
847
848         ir_node *irn;
849         void *it, *nit;
850         int n, n_regs;
851         unsigned i;
852
853         n_regs = 0;
854         for(i = 0; i < co->cls->n_regs; ++i) {
855                 const arch_register_t *reg = &co->cls->regs[i];
856                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
857         }
858
859         /*
860          * n contains the first node number.
861          * the values below n are the pre-colored register nodes
862          */
863
864         it  = be_ifg_nodes_iter_alloca(ifg);
865         nit = be_ifg_neighbours_iter_alloca(ifg);
866
867         n = n_regs;
868         be_ifg_foreach_node(ifg, it, irn) {
869                 if(!arch_irn_is(co->aenv, irn, ignore))
870                         set_irn_link(irn, INT_TO_PTR(n++));
871         }
872
873         fprintf(f, "%d %d\n", n, n_regs);
874
875         be_ifg_foreach_node(ifg, it, irn) {
876                 if(!arch_irn_is(co->aenv, irn, ignore)) {
877                         int idx            = PTR_TO_INT(get_irn_link(irn));
878                         affinity_node_t *a = get_affinity_info(co, irn);
879
880                         const arch_register_req_t *req;
881                         ir_node *adj;
882
883                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
884                         if(arch_register_req_is(req, limited)) {
885                                 for(i = 0; i < co->cls->n_regs; ++i) {
886                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
887                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
888                                 }
889                         }
890
891
892                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
893                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
894                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
895                                         if(idx < adj_idx)
896                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
897                                 }
898                         }
899
900                         if(a) {
901                                 neighb_t *n;
902
903                                 co_gs_foreach_neighb(a, n) {
904                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
905                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
906                                                 if(idx < n_idx)
907                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
908                                         }
909                                 }
910                         }
911                 }
912         }
913 }
914
915 typedef struct _appel_clique_walker_t {
916         ir_phase ph;
917         const copy_opt_t *co;
918         int curr_nr;
919         int node_count;
920         FILE *f;
921         int dumb;
922         int *color_map;
923         struct obstack obst;
924 } appel_clique_walker_t;
925
926 typedef struct _appel_block_info_t {
927         int *live_end_nr;
928         int *live_in_nr;
929         int *phi_nr;
930         ir_node **live_end;
931         ir_node **live_in;
932         ir_node **phi;
933         int n_live_end;
934         int n_live_in;
935         int n_phi;
936 } appel_block_info_t;
937
938 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
939 {
940 #if 0
941         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
942         int res = (int) freq;
943         return res == 0 ? 1 : res;
944 #else
945         ir_loop *loop = get_irn_loop(bl);
946         (void) env;
947         if(loop) {
948                 int d = get_loop_depth(loop);
949                 return 1 + d * d;
950         }
951         return 1;
952 #endif
953 }
954
955 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
956 {
957         appel_block_info_t *res = NULL;
958         (void) old;
959
960         if(is_Block(irn)) {
961                 appel_clique_walker_t *d = (void *) phase;
962                 res = phase_alloc(phase, sizeof(res[0]));
963                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
964                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
965                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
966                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
967                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
968                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
969         }
970
971         return res;
972 }
973
974 typedef struct _insn_list_t {
975         be_insn_t *insn;
976         struct list_head list;
977 } insn_list_t;
978
979 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
980 {
981         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
982         int i;
983
984         for(i = 0; i < bli->n_live_end; ++i)
985                 if(bli->live_end[i] == irn)
986                         return bli->live_end_nr[i];
987
988         return -1;
989 }
990
991 static int appel_dump_clique(appel_clique_walker_t *env, const ir_nodeset_t *live, ir_node *bl, int curr_nr, int start_nr)
992 {
993         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
994         ir_node *irn;
995         int n_live;
996         int j;
997         ir_nodeset_iterator_t iter;
998
999         n_live = 0;
1000         foreach_ir_nodeset(live, irn, iter)
1001                 live_arr[n_live++] = irn;
1002
1003         /* dump the live after clique */
1004         if(!env->dumb) {
1005                 for(j = 0; j < n_live; ++j) {
1006                         int k;
1007
1008                         for(k = j + 1; k < n_live; ++k) {
1009                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
1010                         }
1011                         fprintf(env->f, "\n");
1012                 }
1013         }
1014
1015         /* dump the affinities */
1016         for(j = 0; !env->dumb && j < n_live; ++j) {
1017                 ir_node *irn = live_arr[j];
1018                 int old_nr = PTR_TO_INT(get_irn_link(irn));
1019
1020                 /* if the node was already live in the last insn dump the affinity */
1021                 if(old_nr > start_nr) {
1022                         int weight = appel_aff_weight(env, bl);
1023                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
1024                 }
1025         }
1026
1027         /* set the current numbers into the link field. */
1028         for(j = 0; j < n_live; ++j) {
1029                 ir_node *irn = live_arr[j];
1030                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1031         }
1032
1033         return curr_nr + n_live;
1034 }
1035
1036 static void appel_walker(ir_node *bl, void *data)
1037 {
1038         appel_clique_walker_t *env = data;
1039         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1040         struct obstack *obst       = &env->obst;
1041         void *base                 = obstack_base(obst);
1042         ir_nodeset_t live;
1043         ir_nodeset_iterator_t iter;
1044         be_lv_t *lv                = env->co->cenv->birg->lv;
1045
1046         int n_insns  = 0;
1047         int n_nodes  = 0;
1048         int start_nr = env->curr_nr;
1049         int curr_nr  = start_nr;
1050
1051         be_insn_env_t insn_env;
1052         int i, j;
1053         ir_node *irn;
1054         be_insn_t **insns;
1055
1056         insn_env.aenv = env->co->aenv;
1057         insn_env.cls  = env->co->cls;
1058         insn_env.obst = obst;
1059         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1060
1061         /* Guess how many insns will be in this block. */
1062         sched_foreach(bl, irn)
1063                 n_nodes++;
1064
1065         bli->n_phi = 0;
1066         insns = xmalloc(n_nodes * sizeof(insns[0]));
1067
1068         /* Put all insns in an array. */
1069         irn = sched_first(bl);
1070         while(!sched_is_end(irn)) {
1071                 be_insn_t *insn;
1072                 insn = be_scan_insn(&insn_env, irn);
1073                 insns[n_insns++] = insn;
1074                 irn = insn->next_insn;
1075         }
1076
1077         DBG((dbg, LEVEL_2, "%+F\n", bl));
1078         ir_nodeset_init(&live);
1079         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, &live);
1080
1081         /* Generate the bad and ugly. */
1082         for(i = n_insns - 1; i >= 0; --i) {
1083                 be_insn_t *insn = insns[i];
1084
1085                 /* The first live set has to be saved in the block border set. */
1086                 if(i == n_insns - 1) {
1087                         j = 0;
1088                         foreach_ir_nodeset(&live, irn, iter) {
1089                                 bli->live_end[j]    = irn;
1090                                 bli->live_end_nr[j] = curr_nr + j;
1091                                 ++j;
1092                         }
1093                         bli->n_live_end = j;
1094                 }
1095
1096                 if(!env->dumb) {
1097                         for(j = 0; j < insn->use_start; ++j) {
1098                                 ir_node *op   = insn->ops[j].carrier;
1099                                 bitset_t *adm = insn->ops[j].regs;
1100                                 unsigned k;
1101                                 size_t nr;
1102
1103                                 if(!insn->ops[j].has_constraints)
1104                                         continue;
1105
1106                                 nr = 0;
1107                                 foreach_ir_nodeset(&live, irn, iter) {
1108                                         if(irn == op) {
1109                                                 break;
1110                                         }
1111                                         ++nr;
1112                                 }
1113
1114                                 assert(nr < ir_nodeset_size(&live));
1115
1116                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1117                                         int mapped_col = env->color_map[k];
1118                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1119                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1120                                 }
1121                         }
1122                 }
1123
1124                 /* dump the clique and update the stuff. */
1125                 curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1126
1127                 /* remove all defs. */
1128                 for(j = 0; j < insn->use_start; ++j)
1129                         ir_nodeset_remove(&live, insn->ops[j].carrier);
1130
1131                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1132                         bli->phi[bli->n_phi]    = insn->irn;
1133                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1134                         bli->n_phi++;
1135                 }
1136
1137                 /* add all uses */
1138                 else
1139                         for(j = insn->use_start; j < insn->n_ops; ++j)
1140                                 ir_nodeset_insert(&live, insn->ops[j].carrier);
1141         }
1142
1143         /* print the start clique. */
1144         curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1145
1146         i = 0;
1147         foreach_ir_nodeset(&live, irn, iter) {
1148                 bli->live_in[i]    = irn;
1149                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1150                 ++i;
1151         }
1152         bli->n_live_in = i;
1153
1154         ir_nodeset_destroy(&live);
1155         free(insns);
1156         obstack_free(obst, base);
1157         env->curr_nr = curr_nr;
1158 }
1159
1160 static void appel_inter_block_aff(ir_node *bl, void *data)
1161 {
1162         appel_clique_walker_t *env = data;
1163         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1164
1165         int i, j, n;
1166
1167         for(i = 0; i < bli->n_live_in; ++i) {
1168                 ir_node *irn = bli->live_in[i];
1169
1170                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1171                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1172
1173                         int nr = appel_get_live_end_nr(env, pred, irn);
1174                         assert(nr >= 0);
1175                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1176                 }
1177         }
1178
1179         for(i = 0; i < bli->n_phi; ++i) {
1180                 ir_node *irn = bli->phi[i];
1181
1182                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1183                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1184                         ir_node *op = get_irn_n(irn, j);
1185
1186                         int nr = appel_get_live_end_nr(env, pred, op);
1187                         assert(nr >= 0);
1188                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1189                 }
1190         }
1191
1192 }
1193
1194 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1195 {
1196         unsigned i;
1197         unsigned n_colors;
1198         appel_clique_walker_t env;
1199         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1200         be_lv_t *lv = co->cenv->birg->lv;
1201
1202         be_liveness_recompute(lv);
1203         obstack_init(&env.obst);
1204         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1205         env.curr_nr = co->cls->n_regs;
1206         env.co = co;
1207         env.f = f;
1208
1209         bitset_copy(adm, co->cenv->ignore_colors);
1210         bitset_flip_all(adm);
1211
1212         /* Make color map. */
1213         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1214         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1215                 const arch_register_t *reg = &co->cls->regs[i];
1216                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : (int) n_colors++;
1217         }
1218
1219         env.dumb = 1;
1220         env.curr_nr = n_colors;
1221         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1222         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1223
1224         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1225
1226         /* make the first k nodes interfere */
1227         for(i = 0; i < n_colors; ++i) {
1228                 unsigned j;
1229                 for(j = i + 1; j < n_colors; ++j)
1230                         fprintf(f, "%d %d -1 ", i, j);
1231                 fprintf(f, "\n");
1232         }
1233
1234         env.dumb = 0;
1235         env.curr_nr = n_colors;
1236         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1237         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1238         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1239         obstack_free(&env.obst, NULL);
1240 }
1241
1242 /*
1243          ___ _____ ____   ____   ___ _____   ____                        _
1244         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1245          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1246          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1247         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1248                                                                   |_|            |___/
1249 */
1250
1251 static const char *get_dot_color_name(size_t col)
1252 {
1253         static const char *names[] = {
1254                 "blue",
1255                 "red",
1256                 "green",
1257                 "yellow",
1258                 "cyan",
1259                 "magenta",
1260                 "orange",
1261                 "chocolate",
1262                 "beige",
1263                 "navy",
1264                 "darkgreen",
1265                 "darkred",
1266                 "lightPink",
1267                 "chartreuse",
1268                 "lightskyblue",
1269                 "linen",
1270                 "pink",
1271                 "lightslateblue",
1272                 "mintcream",
1273                 "red",
1274                 "darkolivegreen",
1275                 "mediumblue",
1276                 "mistyrose",
1277                 "salmon",
1278                 "darkseagreen",
1279                 "mediumslateblue"
1280                 "moccasin",
1281                 "tomato",
1282                 "forestgreen",
1283                 "darkturquoise",
1284                 "palevioletred"
1285         };
1286
1287         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1288 }
1289
1290 typedef struct _co_ifg_dump_t {
1291         const copy_opt_t *co;
1292         unsigned flags;
1293 } co_ifg_dump_t;
1294
1295 static void ifg_dump_graph_attr(FILE *f, void *self)
1296 {
1297         (void) self;
1298         fprintf(f, "overlap=scale");
1299 }
1300
1301 static int ifg_is_dump_node(void *self, ir_node *irn)
1302 {
1303         co_ifg_dump_t *cod = self;
1304         return !arch_irn_is(cod->co->aenv, irn, ignore);
1305 }
1306
1307 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1308 {
1309         co_ifg_dump_t *env         = self;
1310         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1311         const arch_register_req_t *req;
1312         int limited;
1313
1314         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1315         limited = arch_register_req_is(req, limited);
1316
1317         if(env->flags & CO_IFG_DUMP_LABELS) {
1318                 ir_fprintf(f, "label=\"%+F", irn);
1319
1320 #if 0
1321                 // TODO fix this...
1322                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1323                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1324                         req.limited(req.limited_env, bs);
1325                         ir_fprintf(f, "\\n%B", bs);
1326                 }
1327 #endif
1328                 ir_fprintf(f, "\" ");
1329         } else {
1330                 fprintf(f, "label=\"\" shape=point " );
1331         }
1332
1333         if(env->flags & CO_IFG_DUMP_SHAPE)
1334                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1335
1336         if(env->flags & CO_IFG_DUMP_COLORS)
1337                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1338 }
1339
1340 static void ifg_dump_at_end(FILE *file, void *self)
1341 {
1342         co_ifg_dump_t *env = self;
1343         affinity_node_t *a;
1344
1345         co_gs_foreach_aff_node(env->co, a) {
1346                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1347                 unsigned aidx = get_irn_idx(a->irn);
1348                 neighb_t *n;
1349
1350                 co_gs_foreach_neighb(a, n) {
1351                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1352                         unsigned nidx = get_irn_idx(n->irn);
1353
1354                         if(aidx < nidx) {
1355                                 const char *color = nr == ar ? "blue" : "red";
1356                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1357                                 if(env->flags & CO_IFG_DUMP_LABELS)
1358                                         fprintf(file, "label=\"%d\" ", n->costs);
1359                                 if(env->flags & CO_IFG_DUMP_COLORS)
1360                                         fprintf(file, "color=%s ", color);
1361                                 else
1362                                         fprintf(file, "style=dotted");
1363                                 fprintf(file, "];\n");
1364                         }
1365                 }
1366         }
1367 }
1368
1369
1370 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1371         ifg_is_dump_node,
1372         ifg_dump_graph_attr,
1373         ifg_dump_node_attr,
1374         NULL,
1375         NULL,
1376         ifg_dump_at_end
1377 };
1378
1379
1380
1381 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1382 {
1383         co_ifg_dump_t cod;
1384
1385         cod.co    = co;
1386         cod.flags = flags;
1387         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1388 }
1389
1390
1391 void co_solve_park_moon(copy_opt_t *opt)
1392 {
1393         (void) opt;
1394 }
1395
1396 static int void_algo(copy_opt_t *co)
1397 {
1398         (void) co;
1399         return 0;
1400 }
1401
1402 /*
1403                 _    _                  _ _   _
1404            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1405           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1406          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1407         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1408                            |___/
1409 */
1410
1411 typedef struct {
1412         co_algo_t  *algo;
1413         const char *name;
1414         int        can_improve_existing;
1415 } co_algo_info_t;
1416
1417 static co_algo_info_t algos[] = {
1418         { void_algo,               "none",  0 },
1419         { co_solve_heuristic,      "heur1", 0 },
1420         { co_solve_heuristic_new,  "heur2", 0 },
1421 #ifdef WITH_JVM
1422         { co_solve_heuristic_java, "heur3", 0 },
1423 #else
1424         { NULL,                    "heur3", 0 },
1425 #endif
1426         { co_solve_heuristic_mst,  "heur4", 0 },
1427 #ifdef WITH_ILP
1428         { co_solve_ilp2,           "ilp",   1 },
1429 #else
1430         { NULL,                    "ilp",   1 },
1431 #endif
1432         { NULL,                    "",      0 }
1433 };
1434
1435 /*
1436     __  __       _         ____       _
1437    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1438    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1439    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1440    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1441
1442 */
1443
1444 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1445 {
1446         FILE *result;
1447         char buf[1024];
1448
1449         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1450         result = fopen(buf, "wt");
1451         if(result == NULL) {
1452                 panic("Couldn't open '%s' for writing.", buf);
1453         }
1454
1455         return result;
1456 }
1457
1458 void co_driver(be_chordal_env_t *cenv)
1459 {
1460         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1461         co_complete_stats_t before, after;
1462         copy_opt_t          *co;
1463         co_algo_t           *algo_func;
1464         int                 was_optimal = 0;
1465
1466         if (algo >= CO_ALGO_LAST)
1467                 return;
1468
1469         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1470
1471         co = new_copy_opt(cenv, cost_func);
1472         co_build_ou_structure(co);
1473         co_build_graph_structure(co);
1474
1475         co_complete_stats(co, &before);
1476
1477         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1478         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1479         be_stat_ev_ull("co_max_costs",    before.max_costs);
1480         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1481         be_stat_ev_ull("co_aff_int",      before.aff_int);
1482
1483         be_stat_ev_ull("co_init_costs",   before.costs);
1484         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1485
1486         /* Dump the interference graph in Appel's format. */
1487         if (dump_flags & DUMP_APPEL) {
1488                 FILE *f = my_open(cenv, "", ".apl");
1489                 co_dump_appel_graph(co, f);
1490                 fclose(f);
1491         }
1492
1493         if (dump_flags & DUMP_BEFORE) {
1494                 FILE *f = my_open(cenv, "", "-before.dot");
1495                 co_dump_ifg_dot(co, f, style_flags);
1496                 fclose(f);
1497         }
1498
1499         /* if the algo can improve results, provide an initial solution with heur3 */
1500         if (improve && algos[algo].can_improve_existing) {
1501                 co_complete_stats_t stats;
1502
1503                 /* produce a heuristic solution */
1504 #ifdef WITH_JVM
1505                 co_solve_heuristic_java(co);
1506 #else
1507                 co_solve_heuristic(co);
1508 #endif /* WITH_JVM */
1509
1510                 /* do the stats and provide the current costs */
1511                 co_complete_stats(co, &stats);
1512                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1513         }
1514
1515 #ifdef WITH_JVM
1516         /* start the JVM here so that it does not tamper the timing. */
1517         if (algo == CO_ALGO_HEUR3)
1518                 be_java_coal_start_jvm();
1519 #endif /* WITH_JVM */
1520
1521         algo_func = algos[algo].algo;
1522
1523         /* perform actual copy minimization */
1524         lc_timer_reset_and_start(timer);
1525         was_optimal = algo_func(co);
1526         lc_timer_stop(timer);
1527
1528         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1529         be_stat_ev_ull("co_optimal", was_optimal);
1530
1531         if (dump_flags & DUMP_AFTER) {
1532                 FILE *f = my_open(cenv, "", "-after.dot");
1533                 co_dump_ifg_dot(co, f, style_flags);
1534                 fclose(f);
1535         }
1536
1537         co_complete_stats(co, &after);
1538
1539         if (do_stats) {
1540                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1541                 ulong64 evitable          = after.costs     - after.inevit_costs;
1542
1543                 ir_printf("%30F ", cenv->irg);
1544                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1545
1546                 if(optimizable_costs > 0)
1547                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1548                 else
1549                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1550         }
1551
1552         be_stat_ev_ull("co_after_costs", after.costs);
1553         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1554
1555         co_free_graph_structure(co);
1556         co_free_ou_structure(co);
1557         free_copy_opt(co);
1558 }