return 0;
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "bejavacoal.h"
64 #include "bestatevent.h"
65 #include "beirg_t.h"
66 #include "error.h"
67
68 #include <libcore/lc_timing.h>
69 #include <libcore/lc_opts.h>
70 #include <libcore/lc_opts_enum.h>
71
72 #define DUMP_BEFORE 1
73 #define DUMP_AFTER  2
74 #define DUMP_APPEL  4
75 #define DUMP_ALL    2 * DUMP_APPEL - 1
76
77 #define COST_FUNC_FREQ     1
78 #define COST_FUNC_LOOP     2
79 #define COST_FUNC_ALL_ONE  3
80
81 static unsigned   dump_flags  = 0;
82 static unsigned   style_flags = 0;
83 static unsigned   do_stats    = 0;
84 static cost_fct_t cost_func   = co_get_costs_exec_freq;
85 static unsigned   algo        = CO_ALGO_HEUR4;
86 static int        improve     = 1;
87
88 static const lc_opt_enum_mask_items_t dump_items[] = {
89         { "before",  DUMP_BEFORE },
90         { "after",   DUMP_AFTER  },
91         { "appel",   DUMP_APPEL  },
92         { "all",     DUMP_ALL    },
93         { NULL,      0 }
94 };
95
96 static const lc_opt_enum_mask_items_t style_items[] = {
97         { "color",   CO_IFG_DUMP_COLORS },
98         { "labels",  CO_IFG_DUMP_LABELS },
99         { "constr",  CO_IFG_DUMP_CONSTR },
100         { "shape",   CO_IFG_DUMP_SHAPE  },
101         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
102         { NULL,      0 }
103 };
104
105 static const lc_opt_enum_mask_items_t algo_items[] = {
106         { "none",   CO_ALGO_NONE  },
107         { "heur",   CO_ALGO_HEUR  },
108         { "heur2",  CO_ALGO_HEUR2 },
109         { "heur3",  CO_ALGO_HEUR3 },
110         { "heur4",  CO_ALGO_HEUR4 },
111         { "ilp",    CO_ALGO_ILP   },
112         { NULL,     0 }
113 };
114
115 typedef int (*opt_funcptr)(void);
116
117 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
118         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
119         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
120         { "one",    (opt_funcptr) co_get_costs_all_one },
121         { NULL,     NULL }
122 };
123
124 static lc_opt_enum_mask_var_t dump_var = {
125         &dump_flags, dump_items
126 };
127
128 static lc_opt_enum_mask_var_t style_var = {
129         &style_flags, style_items
130 };
131
132 static lc_opt_enum_mask_var_t algo_var = {
133         &algo, algo_items
134 };
135
136 static lc_opt_enum_func_ptr_var_t cost_func_var = {
137         (opt_funcptr*) &cost_func, cost_func_items
138 };
139
140 static const lc_opt_table_entry_t options[] = {
141         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
142         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
143         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
144         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
145         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
146         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
147         LC_OPT_LAST
148 };
149
150 /* Insert additional options registration functions here. */
151 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
152 extern void be_co2_register_options(lc_opt_entry_t *grp);
153 extern void be_co3_register_options(lc_opt_entry_t *grp);
154
155 void be_init_copycoal(void)
156 {
157         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
158         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
159         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
160         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
161
162         lc_opt_add_table(co_grp, options);
163 }
164
165 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
166
167 #undef QUICK_AND_DIRTY_HACK
168
169 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
170 {
171         if (env->ifg)
172                 return be_ifg_connected(env->ifg, a, b);
173         else
174                 return values_interfere(env->birg, a, b);
175 }
176
177
178 /******************************************************************************
179     _____                           _
180    / ____|                         | |
181   | |  __  ___ _ __   ___ _ __ __ _| |
182   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
183   | |__| |  __/ | | |  __/ | | (_| | |
184    \_____|\___|_| |_|\___|_|  \__,_|_|
185
186  ******************************************************************************/
187
188 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
189
190
191 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
192 {
193         const char *s1, *s2, *s3;
194         int len;
195         copy_opt_t *co;
196
197         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
198
199         co = xcalloc(1, sizeof(*co));
200         co->cenv      = chordal_env;
201         co->aenv      = chordal_env->birg->main_env->arch_env;
202         co->irg       = chordal_env->irg;
203         co->cls       = chordal_env->cls;
204         co->get_costs = get_costs;
205
206         s1 = get_irp_prog_name();
207         s2 = get_entity_name(get_irg_entity(co->irg));
208         s3 = chordal_env->cls->name;
209         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
210         co->name = xmalloc(len);
211         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
212
213         return co;
214 }
215
216 void free_copy_opt(copy_opt_t *co) {
217         xfree(co->name);
218         free(co);
219 }
220
221 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
222         const arch_register_req_t *req;
223         const arch_register_t *reg;
224
225         if (arch_irn_is(co->aenv, irn, ignore))
226                 return 0;
227
228         reg = arch_get_irn_register(co->aenv, irn);
229         if (arch_register_type_is(reg, ignore))
230                 return 0;
231
232         req = arch_get_register_req(co->aenv, irn, -1);
233         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
234                 return 1;
235
236         return 0;
237 }
238
239 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
240         int cost = 0;
241         ir_loop *loop;
242         ir_node *root_block = get_nodes_block(root);
243         (void) co;
244         (void) arg;
245
246         if (is_Phi(root)) {
247                 /* for phis the copies are placed in the corresponding pred-block */
248                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
249         } else {
250                 /* a perm places the copy in the same block as it resides */
251                 loop = get_irn_loop(root_block);
252         }
253         if (loop) {
254                 int d = get_loop_depth(loop);
255                 cost = d*d;
256         }
257         return 1+cost;
258 }
259
260 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
261         int res;
262         ir_node *root_bl = get_nodes_block(root);
263         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
264         (void) arg;
265         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
266
267         /* don't allow values smaller than one. */
268         return res < 1 ? 1 : res;
269 }
270
271
272 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
273         (void) co;
274         (void) root;
275         (void) arg;
276         (void) pos;
277         return 1;
278 }
279
280 /******************************************************************************
281    ____        _   _    _       _ _          _____ _
282   / __ \      | | | |  | |     (_) |        / ____| |
283  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
284  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
285  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
286   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
287         | |                                                            __/ |
288         |_|                                                           |___/
289  ******************************************************************************/
290
291 /**
292  * Determines a maximum weighted independent set with respect to
293  * the interference and conflict edges of all nodes in a qnode.
294  */
295 static int ou_max_ind_set_costs(unit_t *ou) {
296         be_chordal_env_t *chordal_env = ou->co->cenv;
297         ir_node **safe, **unsafe;
298         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
299         bitset_t *curr;
300         bitset_pos_t pos;
301         int max, curr_weight, best_weight = 0;
302
303         /* assign the nodes into two groups.
304          * safe: node has no interference, hence it is in every max stable set.
305          * unsafe: node has an interference
306          */
307         safe = alloca((ou->node_count-1) * sizeof(*safe));
308         safe_costs = 0;
309         safe_count = 0;
310         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
311         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
312         unsafe_count = 0;
313         for(i=1; i<ou->node_count; ++i) {
314                 int is_safe = 1;
315                 for(o=1; o<ou->node_count; ++o) {
316                         if (i==o)
317                                 continue;
318                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
319                                 unsafe_costs[unsafe_count] = ou->costs[i];
320                                 unsafe[unsafe_count] = ou->nodes[i];
321                                 ++unsafe_count;
322                                 is_safe = 0;
323                                 break;
324                         }
325                 }
326                 if (is_safe) {
327                         safe_costs += ou->costs[i];
328                         safe[safe_count++] = ou->nodes[i];
329                 }
330         }
331
332
333         /* now compute the best set out of the unsafe nodes*/
334         if (unsafe_count > MIS_HEUR_TRIGGER) {
335                 bitset_t *best = bitset_alloca(unsafe_count);
336                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
337                 for (i=0; i<unsafe_count; ++i) {
338                         bitset_set(best, i);
339                         /* check if it is a stable set */
340                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
341                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
342                                         bitset_clear(best, i); /* clear the bit and try next one */
343                                         break;
344                                 }
345                 }
346                 /* compute the weight */
347                 bitset_foreach(best, pos)
348                         best_weight += unsafe_costs[pos];
349         } else {
350                 /* Exact Algorithm: Brute force */
351                 curr = bitset_alloca(unsafe_count);
352                 bitset_set_all(curr);
353                 while ((max = bitset_popcnt(curr)) != 0) {
354                         /* check if curr is a stable set */
355                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
356                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
357                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
358                                                         goto no_stable_set;
359
360                         /* if we arrive here, we have a stable set */
361                         /* compute the weigth of the stable set*/
362                         curr_weight = 0;
363                         bitset_foreach(curr, pos)
364                                 curr_weight += unsafe_costs[pos];
365
366                         /* any better ? */
367                         if (curr_weight > best_weight) {
368                                 best_weight = curr_weight;
369                         }
370
371         no_stable_set:
372                         bitset_minus1(curr);
373                 }
374         }
375
376         return safe_costs+best_weight;
377 }
378
379 static void co_collect_units(ir_node *irn, void *env) {
380         copy_opt_t *co = env;
381         unit_t *unit;
382
383         if (!is_curr_reg_class(co, irn))
384                 return;
385         if (!co_is_optimizable_root(co, irn))
386                 return;
387
388         /* Init a new unit */
389         unit = xcalloc(1, sizeof(*unit));
390         unit->co = co;
391         unit->node_count = 1;
392         INIT_LIST_HEAD(&unit->queue);
393
394         /* Phi with some/all of its arguments */
395         if (is_Reg_Phi(irn)) {
396                 int i, arity;
397
398                 /* init */
399                 arity = get_irn_arity(irn);
400                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
401                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
402                 unit->nodes[0] = irn;
403
404                 /* fill */
405                 for (i=0; i<arity; ++i) {
406                         int o, arg_pos;
407                         ir_node *arg = get_irn_n(irn, i);
408
409                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
410                         if (arg == irn)
411                                 continue;
412                         if (nodes_interfere(co->cenv, irn, arg)) {
413                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
414                                 continue;
415                         }
416
417                         /* Else insert the argument of the phi to the members of this ou */
418                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
419
420                         if (! arch_irn_is(co->aenv, arg, ignore)) {
421                                 /* Check if arg has occurred at a prior position in the arg/list */
422                                 arg_pos = 0;
423                                 for (o=1; o<unit->node_count; ++o) {
424                                         if (unit->nodes[o] == arg) {
425                                                 arg_pos = o;
426                                                 break;
427                                         }
428                                 }
429
430                                 if (!arg_pos) { /* a new argument */
431                                         /* insert node, set costs */
432                                         unit->nodes[unit->node_count] = arg;
433                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
434                                         unit->node_count++;
435                                 } else { /* arg has occurred before in same phi */
436                                         /* increase costs for existing arg */
437                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
438                                 }
439                         }
440                 }
441                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
442                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
443         } else if (is_Perm_Proj(co->aenv, irn)) {
444                 /* Proj of a perm with corresponding arg */
445                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
446                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
447                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
448                 unit->node_count = 2;
449                 unit->nodes[0] = irn;
450                 unit->nodes[1] = get_Perm_src(irn);
451                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
452         } else {
453                 const arch_register_req_t *req =
454                         arch_get_register_req(co->aenv, irn, -1);
455
456                 /* Src == Tgt of a 2-addr-code instruction */
457                 if (is_2addr_code(req)) {
458                         const unsigned other = req->other_same;
459                         int            count = 0;
460                         int            i;
461
462                         for (i = 0; (1U << i) <= other; ++i) {
463                                 if (other & (1U << i)) {
464                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
465                                         if (!arch_irn_is(co->aenv, o, ignore) &&
466                                                         !nodes_interfere(co->cenv, irn, o)) {
467                                                 ++count;
468                                         }
469                                 }
470                         }
471
472                         if (count != 0) {
473                                 int k = 0;
474                                 ++count;
475                                 unit->nodes = xmalloc(count * sizeof(*unit->nodes));
476                                 unit->costs = xmalloc(count * sizeof(*unit->costs));
477                                 unit->node_count = count;
478                                 unit->nodes[k++] = irn;
479
480                                 for (i = 0; 1U << i <= other; ++i) {
481                                         if (other & (1U << i)) {
482                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
483                                                 if (!arch_irn_is(co->aenv, o, ignore) &&
484                                                                 !nodes_interfere(co->cenv, irn, o)) {
485                                                         unit->nodes[k] = o;
486                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
487                                                         ++k;
488                                                 }
489                                         }
490                                 }
491                         }
492                 } else {
493                         assert(0 && "This is not an optimizable node!");
494                 }
495         }
496
497         /* Insert the new unit at a position according to its costs */
498         if (unit->node_count > 1) {
499                 int i;
500                 struct list_head *tmp;
501
502                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
503                 for(i=1; i<unit->node_count; ++i) {
504                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
505                         unit->all_nodes_costs += unit->costs[i];
506                 }
507
508                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
509                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
510                 /* Insert the new ou according to its sort_key */
511                 tmp = &co->units;
512                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
513                         tmp = tmp->next;
514                 list_add(&unit->units, tmp);
515         } else {
516                 free(unit);
517         }
518 }
519
520 #ifdef QUICK_AND_DIRTY_HACK
521
522 static int compare_ous(const void *k1, const void *k2) {
523         const unit_t *u1 = *((const unit_t **) k1);
524         const unit_t *u2 = *((const unit_t **) k2);
525         int i, o, u1_has_constr, u2_has_constr;
526         arch_register_req_t req;
527         const arch_env_t *aenv = u1->co->aenv;
528
529         /* Units with constraints come first */
530         u1_has_constr = 0;
531         for (i=0; i<u1->node_count; ++i) {
532                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
533                 if (arch_register_req_is(&req, limited)) {
534                         u1_has_constr = 1;
535                         break;
536                 }
537         }
538
539         u2_has_constr = 0;
540         for (i=0; i<u2->node_count; ++i) {
541                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
542                 if (arch_register_req_is(&req, limited)) {
543                         u2_has_constr = 1;
544                         break;
545                 }
546         }
547
548         if (u1_has_constr != u2_has_constr)
549                 return u2_has_constr - u1_has_constr;
550
551         /* Now check, whether the two units are connected */
552 #if 0
553         for (i=0; i<u1->node_count; ++i)
554                 for (o=0; o<u2->node_count; ++o)
555                         if (u1->nodes[i] == u2->nodes[o])
556                                 return 0;
557 #endif
558
559         /* After all, the sort key decides. Greater keys come first. */
560         return u2->sort_key - u1->sort_key;
561
562 }
563
564 /**
565  * Sort the ou's according to constraints and their sort_key
566  */
567 static void co_sort_units(copy_opt_t *co) {
568         int i, count = 0, costs;
569         unit_t *ou, **ous;
570
571         /* get the number of ous, remove them form the list and fill the array */
572         list_for_each_entry(unit_t, ou, &co->units, units)
573                 count++;
574         ous = alloca(count * sizeof(*ous));
575
576         costs = co_get_max_copy_costs(co);
577
578         i = 0;
579         list_for_each_entry(unit_t, ou, &co->units, units)
580                 ous[i++] = ou;
581
582         INIT_LIST_HEAD(&co->units);
583
584         assert(count == i && list_empty(&co->units));
585
586         for (i=0; i<count; ++i)
587                 ir_printf("%+F\n", ous[i]->nodes[0]);
588
589         qsort(ous, count, sizeof(*ous), compare_ous);
590
591         ir_printf("\n\n");
592         for (i=0; i<count; ++i)
593                 ir_printf("%+F\n", ous[i]->nodes[0]);
594
595         /* reinsert into list in correct order */
596         for (i=0; i<count; ++i)
597                 list_add_tail(&ous[i]->units, &co->units);
598
599         assert(costs == co_get_max_copy_costs(co));
600 }
601 #endif
602
603 void co_build_ou_structure(copy_opt_t *co) {
604         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
605         INIT_LIST_HEAD(&co->units);
606         irg_walk_graph(co->irg, co_collect_units, NULL, co);
607 #ifdef QUICK_AND_DIRTY_HACK
608         co_sort_units(co);
609 #endif
610 }
611
612 void co_free_ou_structure(copy_opt_t *co) {
613         unit_t *curr, *tmp;
614         ASSERT_OU_AVAIL(co);
615         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
616                 xfree(curr->nodes);
617                 xfree(curr->costs);
618                 xfree(curr);
619         }
620         co->units.next = NULL;
621 }
622
623 /* co_solve_heuristic() is implemented in becopyheur.c */
624
625 int co_get_max_copy_costs(const copy_opt_t *co) {
626         int i, res = 0;
627         unit_t *curr;
628
629         ASSERT_OU_AVAIL(co);
630
631         list_for_each_entry(unit_t, curr, &co->units, units) {
632                 res += curr->inevitable_costs;
633                 for (i=1; i<curr->node_count; ++i)
634                         res += curr->costs[i];
635         }
636         return res;
637 }
638
639 int co_get_inevit_copy_costs(const copy_opt_t *co) {
640         int res = 0;
641         unit_t *curr;
642
643         ASSERT_OU_AVAIL(co);
644
645         list_for_each_entry(unit_t, curr, &co->units, units)
646                 res += curr->inevitable_costs;
647         return res;
648 }
649
650 int co_get_copy_costs(const copy_opt_t *co) {
651         int i, res = 0;
652         unit_t *curr;
653
654         ASSERT_OU_AVAIL(co);
655
656         list_for_each_entry(unit_t, curr, &co->units, units) {
657                 int root_col = get_irn_col(co, curr->nodes[0]);
658                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
659                 res += curr->inevitable_costs;
660                 for (i=1; i<curr->node_count; ++i) {
661                         int arg_col = get_irn_col(co, curr->nodes[i]);
662                         if (root_col != arg_col) {
663                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
664                                 res += curr->costs[i];
665                         }
666                 }
667         }
668         return res;
669 }
670
671 int co_get_lower_bound(const copy_opt_t *co) {
672         int res = 0;
673         unit_t *curr;
674
675         ASSERT_OU_AVAIL(co);
676
677         list_for_each_entry(unit_t, curr, &co->units, units)
678                 res += curr->inevitable_costs + curr->min_nodes_costs;
679         return res;
680 }
681
682 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
683 {
684         bitset_t *seen = bitset_irg_malloc(co->irg);
685         affinity_node_t *an;
686
687         memset(stat, 0, sizeof(stat[0]));
688
689         /* count affinity edges. */
690         co_gs_foreach_aff_node(co, an) {
691                 neighb_t *neigh;
692                 stat->aff_nodes += 1;
693                 bitset_add_irn(seen, an->irn);
694                 co_gs_foreach_neighb(an, neigh) {
695                         if(!bitset_contains_irn(seen, neigh->irn)) {
696                                 stat->aff_edges += 1;
697                                 stat->max_costs += neigh->costs;
698
699                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
700                                         stat->costs += neigh->costs;
701                                         stat->unsatisfied_edges += 1;
702                                 }
703
704                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
705                                         stat->aff_int += 1;
706                                         stat->inevit_costs += neigh->costs;
707                                 }
708
709                         }
710                 }
711         }
712
713         bitset_free(seen);
714 }
715
716 /******************************************************************************
717    _____                 _        _____ _
718   / ____|               | |      / ____| |
719  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
720  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
721  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
722   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
723                   | |                                       __/ |
724                   |_|                                      |___/
725  ******************************************************************************/
726
727 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
728         const affinity_node_t *n1 = k1;
729         const affinity_node_t *n2 = k2;
730         (void) size;
731
732         return (n1->irn != n2->irn);
733 }
734
735 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
736         affinity_node_t new_node, *node;
737         neighb_t        *nbr;
738         int             allocnew = 1;
739
740         new_node.irn        = n1;
741         new_node.degree     = 0;
742         new_node.neighbours = NULL;
743         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
744
745         for (nbr = node->neighbours; nbr; nbr = nbr->next)
746                 if (nbr->irn == n2) {
747                         allocnew = 0;
748                         break;
749                 }
750
751         /* if we did not find n2 in n1's neighbourhood insert it */
752         if (allocnew) {
753                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
754                 nbr->irn   = n2;
755                 nbr->costs = 0;
756                 nbr->next  = node->neighbours;
757
758                 node->neighbours = nbr;
759                 node->degree++;
760         }
761
762         /* now nbr points to n1's neighbour-entry of n2 */
763         nbr->costs += costs;
764 }
765
766 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
767         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
768                 add_edge(co, n1, n2, costs);
769                 add_edge(co, n2, n1, costs);
770         }
771 }
772
773 static void build_graph_walker(ir_node *irn, void *env) {
774         copy_opt_t *co = env;
775         int pos, max;
776         const arch_register_t *reg;
777
778         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
779                 return;
780
781         reg = arch_get_irn_register(co->aenv, irn);
782         if (arch_register_type_is(reg, ignore))
783                 return;
784
785         if (is_Reg_Phi(irn)) { /* Phis */
786                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
787                         ir_node *arg = get_irn_n(irn, pos);
788                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
789                 }
790         }
791         else if (is_Perm_Proj(co->aenv, irn)) { /* Perms */
792                 ir_node *arg = get_Perm_src(irn);
793                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
794         }
795         else { /* 2-address code */
796                 const arch_register_req_t *req = arch_get_register_req(co->aenv, irn, -1);
797                 if (is_2addr_code(req)) {
798                         const unsigned other = req->other_same;
799                         int i;
800
801                         for (i = 0; 1U << i <= other; ++i) {
802                                 if (other & (1U << i)) {
803                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
804                                         if (! arch_irn_is(co->aenv, other, ignore))
805                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
806                                 }
807                         }
808                 }
809         }
810 }
811
812 void co_build_graph_structure(copy_opt_t *co) {
813         obstack_init(&co->obst);
814         co->nodes = new_set(compare_affinity_node_t, 32);
815
816         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
817 }
818
819 void co_free_graph_structure(copy_opt_t *co) {
820         ASSERT_GS_AVAIL(co);
821
822         del_set(co->nodes);
823         obstack_free(&co->obst, NULL);
824         co->nodes = NULL;
825 }
826
827 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
828
829 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
830         affinity_node_t new_node, *n;
831
832         ASSERT_GS_AVAIL(co);
833
834         new_node.irn = irn;
835         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
836         if (n) {
837                 return (n->degree > 0);
838         } else
839                 return 0;
840 }
841
842 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
843 {
844         ir_node *nodes[]  = { a, b };
845         bitset_t *constr[] = { NULL, NULL };
846         const arch_register_req_t *req;
847         int j;
848
849         constr[0] = bitset_alloca(co->cls->n_regs);
850         constr[1] = bitset_alloca(co->cls->n_regs);
851
852         for (j = 0; j < 2; ++j) {
853                 req = arch_get_register_req(co->aenv, nodes[j], BE_OUT_POS(0));
854                 if(arch_register_req_is(req, limited))
855                         rbitset_copy_to_bitset(req->limited, constr[j]);
856                 else
857                         bitset_set_all(constr[j]);
858
859         }
860
861         return !bitset_intersect(constr[0], constr[1]);
862 }
863
864 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
865 {
866         be_ifg_t *ifg  = co->cenv->ifg;
867         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
868         int *node_map  = xmalloc((get_irg_last_idx(co->irg) + 1) * sizeof(node_map[0]));
869
870         ir_node *irn;
871         void *it, *nit;
872         int n, n_regs;
873         unsigned i, j;
874
875         n_regs = 0;
876         for(i = 0; i < co->cls->n_regs; ++i) {
877                 const arch_register_t *reg = &co->cls->regs[i];
878                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
879         }
880
881         /*
882          * n contains the first node number.
883          * the values below n are the pre-colored register nodes
884          */
885
886         it  = be_ifg_nodes_iter_alloca(ifg);
887         nit = be_ifg_neighbours_iter_alloca(ifg);
888
889         n = n_regs;
890         be_ifg_foreach_node(ifg, it, irn) {
891                 if(!arch_irn_is(co->aenv, irn, ignore))
892                         node_map[get_irn_idx(irn)] = n++;
893         }
894
895         fprintf(f, "%d %d\n", n, n_regs);
896
897 #if 0
898         printf("debut\n");
899         for (i = 0; i < n_regs; ++i) {
900                 for (j = 0; j < i; ++j) {
901                         fprintf(f, "%d %d -1\n", i, j);
902                         printf("%d %d\n", i, j);
903                 }
904         }
905         printf("fin\n");
906 #endif
907
908         be_ifg_foreach_node(ifg, it, irn) {
909                 if(!arch_irn_is(co->aenv, irn, ignore)) {
910                         int idx            = node_map[get_irn_idx(irn)];
911                         affinity_node_t *a = get_affinity_info(co, irn);
912
913                         const arch_register_req_t *req;
914                         ir_node *adj;
915
916                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
917                         if(arch_register_req_is(req, limited)) {
918                                 for(i = 0; i < co->cls->n_regs; ++i) {
919                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
920                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
921                                 }
922                         }
923
924                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
925                                 if(!arch_irn_is(co->aenv, adj, ignore) && !co_dump_appel_disjoint_constraints(co, irn, adj)) {
926                                         int adj_idx = node_map[get_irn_idx(adj)];
927                                         if(idx < adj_idx)
928                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
929                                 }
930                         }
931
932                         if(a) {
933                                 neighb_t *n;
934
935                                 co_gs_foreach_neighb(a, n) {
936                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
937                                                 int n_idx = node_map[get_irn_idx(n->irn)];
938                                                 if(idx < n_idx)
939                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
940                                         }
941                                 }
942                         }
943                 }
944         }
945
946         xfree(node_map);
947 }
948
949 typedef struct _appel_clique_walker_t {
950         ir_phase ph;
951         const copy_opt_t *co;
952         int curr_nr;
953         int node_count;
954         FILE *f;
955         int dumb;
956         int *color_map;
957         struct obstack obst;
958 } appel_clique_walker_t;
959
960 typedef struct _appel_block_info_t {
961         int *live_end_nr;
962         int *live_in_nr;
963         int *phi_nr;
964         ir_node **live_end;
965         ir_node **live_in;
966         ir_node **phi;
967         int n_live_end;
968         int n_live_in;
969         int n_phi;
970 } appel_block_info_t;
971
972 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
973 {
974 #if 0
975         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
976         int res = (int) freq;
977         return res == 0 ? 1 : res;
978 #else
979         ir_loop *loop = get_irn_loop(bl);
980         (void) env;
981         if(loop) {
982                 int d = get_loop_depth(loop);
983                 return 1 + d * d;
984         }
985         return 1;
986 #endif
987 }
988
989 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
990 {
991         appel_block_info_t *res = NULL;
992         (void) old;
993
994         if(is_Block(irn)) {
995                 appel_clique_walker_t *d = (void *) phase;
996                 res = phase_alloc(phase, sizeof(res[0]));
997                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
998                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
999                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
1000                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
1001                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
1002                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
1003         }
1004
1005         return res;
1006 }
1007
1008 typedef struct _insn_list_t {
1009         be_insn_t *insn;
1010         struct list_head list;
1011 } insn_list_t;
1012
1013 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
1014 {
1015         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
1016         int i;
1017
1018         for(i = 0; i < bli->n_live_end; ++i)
1019                 if(bli->live_end[i] == irn)
1020                         return bli->live_end_nr[i];
1021
1022         return -1;
1023 }
1024
1025 static int appel_dump_clique(appel_clique_walker_t *env, const ir_nodeset_t *live, ir_node *bl, int curr_nr, int start_nr)
1026 {
1027         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
1028         ir_node *irn;
1029         int n_live;
1030         int j;
1031         ir_nodeset_iterator_t iter;
1032
1033         n_live = 0;
1034         foreach_ir_nodeset(live, irn, iter)
1035                 live_arr[n_live++] = irn;
1036
1037         /* dump the live after clique */
1038         if(!env->dumb) {
1039                 for(j = 0; j < n_live; ++j) {
1040                         int k;
1041
1042                         for(k = j + 1; k < n_live; ++k) {
1043                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
1044                         }
1045                         fprintf(env->f, "\n");
1046                 }
1047         }
1048
1049         /* dump the affinities */
1050         for(j = 0; !env->dumb && j < n_live; ++j) {
1051                 ir_node *irn = live_arr[j];
1052                 int old_nr = PTR_TO_INT(get_irn_link(irn));
1053
1054                 /* if the node was already live in the last insn dump the affinity */
1055                 if(old_nr > start_nr) {
1056                         int weight = appel_aff_weight(env, bl);
1057                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
1058                 }
1059         }
1060
1061         /* set the current numbers into the link field. */
1062         for(j = 0; j < n_live; ++j) {
1063                 ir_node *irn = live_arr[j];
1064                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1065         }
1066
1067         return curr_nr + n_live;
1068 }
1069
1070 static void appel_walker(ir_node *bl, void *data)
1071 {
1072         appel_clique_walker_t *env = data;
1073         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1074         struct obstack *obst       = &env->obst;
1075         void *base                 = obstack_base(obst);
1076         ir_nodeset_t live;
1077         ir_nodeset_iterator_t iter;
1078         be_lv_t *lv                = env->co->cenv->birg->lv;
1079
1080         int n_insns  = 0;
1081         int n_nodes  = 0;
1082         int start_nr = env->curr_nr;
1083         int curr_nr  = start_nr;
1084
1085         be_insn_env_t insn_env;
1086         int i, j;
1087         ir_node *irn;
1088         be_insn_t **insns;
1089
1090         insn_env.aenv = env->co->aenv;
1091         insn_env.cls  = env->co->cls;
1092         insn_env.obst = obst;
1093         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1094
1095         /* Guess how many insns will be in this block. */
1096         sched_foreach(bl, irn)
1097                 n_nodes++;
1098
1099         bli->n_phi = 0;
1100         insns = xmalloc(n_nodes * sizeof(insns[0]));
1101
1102         /* Put all insns in an array. */
1103         irn = sched_first(bl);
1104         while(!sched_is_end(irn)) {
1105                 be_insn_t *insn;
1106                 insn = be_scan_insn(&insn_env, irn);
1107                 insns[n_insns++] = insn;
1108                 irn = insn->next_insn;
1109         }
1110
1111         DBG((dbg, LEVEL_2, "%+F\n", bl));
1112         ir_nodeset_init(&live);
1113         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, &live);
1114
1115         /* Generate the bad and ugly. */
1116         for(i = n_insns - 1; i >= 0; --i) {
1117                 be_insn_t *insn = insns[i];
1118
1119                 /* The first live set has to be saved in the block border set. */
1120                 if(i == n_insns - 1) {
1121                         j = 0;
1122                         foreach_ir_nodeset(&live, irn, iter) {
1123                                 bli->live_end[j]    = irn;
1124                                 bli->live_end_nr[j] = curr_nr + j;
1125                                 ++j;
1126                         }
1127                         bli->n_live_end = j;
1128                 }
1129
1130                 if(!env->dumb) {
1131                         for(j = 0; j < insn->use_start; ++j) {
1132                                 ir_node *op   = insn->ops[j].carrier;
1133                                 bitset_t *adm = insn->ops[j].regs;
1134                                 unsigned k;
1135                                 size_t nr;
1136
1137                                 if(!insn->ops[j].has_constraints)
1138                                         continue;
1139
1140                                 nr = 0;
1141                                 foreach_ir_nodeset(&live, irn, iter) {
1142                                         if(irn == op) {
1143                                                 break;
1144                                         }
1145                                         ++nr;
1146                                 }
1147
1148                                 assert(nr < ir_nodeset_size(&live));
1149
1150                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1151                                         int mapped_col = env->color_map[k];
1152                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1153                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1154                                 }
1155                         }
1156                 }
1157
1158                 /* dump the clique and update the stuff. */
1159                 curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1160
1161                 /* remove all defs. */
1162                 for(j = 0; j < insn->use_start; ++j)
1163                         ir_nodeset_remove(&live, insn->ops[j].carrier);
1164
1165                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1166                         bli->phi[bli->n_phi]    = insn->irn;
1167                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1168                         bli->n_phi++;
1169                 }
1170
1171                 /* add all uses */
1172                 else
1173                         for(j = insn->use_start; j < insn->n_ops; ++j)
1174                                 ir_nodeset_insert(&live, insn->ops[j].carrier);
1175         }
1176
1177         /* print the start clique. */
1178         curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1179
1180         i = 0;
1181         foreach_ir_nodeset(&live, irn, iter) {
1182                 bli->live_in[i]    = irn;
1183                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1184                 ++i;
1185         }
1186         bli->n_live_in = i;
1187
1188         ir_nodeset_destroy(&live);
1189         free(insns);
1190         obstack_free(obst, base);
1191         env->curr_nr = curr_nr;
1192 }
1193
1194 static void appel_inter_block_aff(ir_node *bl, void *data)
1195 {
1196         appel_clique_walker_t *env = data;
1197         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1198
1199         int i, j, n;
1200
1201         for(i = 0; i < bli->n_live_in; ++i) {
1202                 ir_node *irn = bli->live_in[i];
1203
1204                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1205                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1206
1207                         int nr = appel_get_live_end_nr(env, pred, irn);
1208                         assert(nr >= 0);
1209                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1210                 }
1211         }
1212
1213         for(i = 0; i < bli->n_phi; ++i) {
1214                 ir_node *irn = bli->phi[i];
1215
1216                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1217                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1218                         ir_node *op = get_irn_n(irn, j);
1219
1220                         int nr = appel_get_live_end_nr(env, pred, op);
1221                         assert(nr >= 0);
1222                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1223                 }
1224         }
1225
1226 }
1227
1228 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1229 {
1230         unsigned i;
1231         unsigned n_colors;
1232         appel_clique_walker_t env;
1233         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1234         be_lv_t *lv = co->cenv->birg->lv;
1235
1236         be_liveness_recompute(lv);
1237         obstack_init(&env.obst);
1238         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1239         env.curr_nr = co->cls->n_regs;
1240         env.co = co;
1241         env.f = f;
1242
1243         bitset_copy(adm, co->cenv->ignore_colors);
1244         bitset_flip_all(adm);
1245
1246         /* Make color map. */
1247         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1248         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1249                 const arch_register_t *reg = &co->cls->regs[i];
1250                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : (int) n_colors++;
1251         }
1252
1253         env.dumb = 1;
1254         env.curr_nr = n_colors;
1255         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1256         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1257
1258         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1259
1260         /* make the first k nodes interfere */
1261         for(i = 0; i < n_colors; ++i) {
1262                 unsigned j;
1263                 for(j = i + 1; j < n_colors; ++j)
1264                         fprintf(f, "%d %d -1 ", i, j);
1265                 fprintf(f, "\n");
1266         }
1267
1268         env.dumb = 0;
1269         env.curr_nr = n_colors;
1270         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1271         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1272         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1273         obstack_free(&env.obst, NULL);
1274 }
1275
1276 /*
1277          ___ _____ ____   ____   ___ _____   ____                        _
1278         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1279          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1280          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1281         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1282                                                                   |_|            |___/
1283 */
1284
1285 static const char *get_dot_color_name(size_t col)
1286 {
1287         static const char *names[] = {
1288                 "blue",
1289                 "red",
1290                 "green",
1291                 "yellow",
1292                 "cyan",
1293                 "magenta",
1294                 "orange",
1295                 "chocolate",
1296                 "beige",
1297                 "navy",
1298                 "darkgreen",
1299                 "darkred",
1300                 "lightPink",
1301                 "chartreuse",
1302                 "lightskyblue",
1303                 "linen",
1304                 "pink",
1305                 "lightslateblue",
1306                 "mintcream",
1307                 "red",
1308                 "darkolivegreen",
1309                 "mediumblue",
1310                 "mistyrose",
1311                 "salmon",
1312                 "darkseagreen",
1313                 "mediumslateblue"
1314                 "moccasin",
1315                 "tomato",
1316                 "forestgreen",
1317                 "darkturquoise",
1318                 "palevioletred"
1319         };
1320
1321         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1322 }
1323
1324 typedef struct _co_ifg_dump_t {
1325         const copy_opt_t *co;
1326         unsigned flags;
1327 } co_ifg_dump_t;
1328
1329 static void ifg_dump_graph_attr(FILE *f, void *self)
1330 {
1331         (void) self;
1332         fprintf(f, "overlap=scale");
1333 }
1334
1335 static int ifg_is_dump_node(void *self, ir_node *irn)
1336 {
1337         co_ifg_dump_t *cod = self;
1338         return !arch_irn_is(cod->co->aenv, irn, ignore);
1339 }
1340
1341 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1342 {
1343         co_ifg_dump_t *env         = self;
1344         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1345         const arch_register_req_t *req;
1346         int limited;
1347
1348         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1349         limited = arch_register_req_is(req, limited);
1350
1351         if(env->flags & CO_IFG_DUMP_LABELS) {
1352                 ir_fprintf(f, "label=\"%+F", irn);
1353
1354                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1355                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1356                         rbitset_copy_to_bitset(req->limited, bs);
1357                         ir_fprintf(f, "\\n%B", bs);
1358                 }
1359                 ir_fprintf(f, "\" ");
1360         } else {
1361                 fprintf(f, "label=\"\" shape=point " );
1362         }
1363
1364         if(env->flags & CO_IFG_DUMP_SHAPE)
1365                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1366
1367         if(env->flags & CO_IFG_DUMP_COLORS)
1368                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1369 }
1370
1371 static void ifg_dump_at_end(FILE *file, void *self)
1372 {
1373         co_ifg_dump_t *env = self;
1374         affinity_node_t *a;
1375
1376         co_gs_foreach_aff_node(env->co, a) {
1377                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1378                 unsigned aidx = get_irn_idx(a->irn);
1379                 neighb_t *n;
1380
1381                 co_gs_foreach_neighb(a, n) {
1382                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1383                         unsigned nidx = get_irn_idx(n->irn);
1384
1385                         if(aidx < nidx) {
1386                                 const char *color = nr == ar ? "blue" : "red";
1387                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1388                                 if(env->flags & CO_IFG_DUMP_LABELS)
1389                                         fprintf(file, "label=\"%d\" ", n->costs);
1390                                 if(env->flags & CO_IFG_DUMP_COLORS)
1391                                         fprintf(file, "color=%s ", color);
1392                                 else
1393                                         fprintf(file, "style=dotted");
1394                                 fprintf(file, "];\n");
1395                         }
1396                 }
1397         }
1398 }
1399
1400
1401 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1402         ifg_is_dump_node,
1403         ifg_dump_graph_attr,
1404         ifg_dump_node_attr,
1405         NULL,
1406         NULL,
1407         ifg_dump_at_end
1408 };
1409
1410
1411
1412 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1413 {
1414         co_ifg_dump_t cod;
1415
1416         cod.co    = co;
1417         cod.flags = flags;
1418         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1419 }
1420
1421
1422 void co_solve_park_moon(copy_opt_t *opt)
1423 {
1424         (void) opt;
1425 }
1426
1427 static int void_algo(copy_opt_t *co)
1428 {
1429         (void) co;
1430         return 0;
1431 }
1432
1433 /*
1434                 _    _                  _ _   _
1435            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1436           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1437          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1438         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1439                            |___/
1440 */
1441
1442 typedef struct {
1443         co_algo_t  *algo;
1444         const char *name;
1445         int        can_improve_existing;
1446 } co_algo_info_t;
1447
1448 static co_algo_info_t algos[] = {
1449         { void_algo,               "none",  0 },
1450         { co_solve_heuristic,      "heur1", 0 },
1451         { co_solve_heuristic_new,  "heur2", 0 },
1452 #ifdef WITH_JVM
1453         { co_solve_heuristic_java, "heur3", 0 },
1454 #else
1455         { NULL,                    "heur3", 0 },
1456 #endif
1457         { co_solve_heuristic_mst,  "heur4", 0 },
1458 #ifdef WITH_ILP
1459         { co_solve_ilp2,           "ilp",   1 },
1460 #else
1461         { NULL,                    "ilp",   1 },
1462 #endif
1463         { NULL,                    "",      0 }
1464 };
1465
1466 /*
1467     __  __       _         ____       _
1468    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1469    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1470    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1471    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1472
1473 */
1474
1475 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1476 {
1477         FILE *result;
1478         char buf[1024];
1479         size_t i, n;
1480         char *tu_name;
1481
1482         n = strlen(env->birg->main_env->cup_name);
1483         tu_name = xmalloc((n + 1) * sizeof(*tu_name));
1484         strcpy(tu_name, env->birg->main_env->cup_name);
1485         for (i = 0; i < n; ++i)
1486                 if (tu_name[i] == '.')
1487                         tu_name[i] = '_';
1488
1489
1490         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1491         xfree(tu_name);
1492         result = fopen(buf, "wt");
1493         if(result == NULL) {
1494                 panic("Couldn't open '%s' for writing.", buf);
1495         }
1496
1497         return result;
1498 }
1499
1500 void co_driver(be_chordal_env_t *cenv)
1501 {
1502         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1503         co_complete_stats_t before, after;
1504         copy_opt_t          *co;
1505         co_algo_t           *algo_func;
1506         int                 was_optimal = 0;
1507
1508         if (algo >= CO_ALGO_LAST)
1509                 return;
1510
1511         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1512
1513         co = new_copy_opt(cenv, cost_func);
1514         co_build_ou_structure(co);
1515         co_build_graph_structure(co);
1516
1517         co_complete_stats(co, &before);
1518
1519         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1520         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1521         be_stat_ev_ull("co_max_costs",    before.max_costs);
1522         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1523         be_stat_ev_ull("co_aff_int",      before.aff_int);
1524
1525         be_stat_ev_ull("co_init_costs",   before.costs);
1526         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1527
1528         if (dump_flags & DUMP_BEFORE) {
1529                 FILE *f = my_open(cenv, "", "-before.dot");
1530                 co_dump_ifg_dot(co, f, style_flags);
1531                 fclose(f);
1532         }
1533
1534         /* if the algo can improve results, provide an initial solution with heur3 */
1535         if (improve && algos[algo].can_improve_existing) {
1536                 co_complete_stats_t stats;
1537
1538                 /* produce a heuristic solution */
1539 #ifdef WITH_JVM
1540                 co_solve_heuristic_java(co);
1541 #else
1542                 co_solve_heuristic(co);
1543 #endif /* WITH_JVM */
1544
1545                 /* do the stats and provide the current costs */
1546                 co_complete_stats(co, &stats);
1547                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1548         }
1549
1550 #ifdef WITH_JVM
1551         /* start the JVM here so that it does not tamper the timing. */
1552         if (algo == CO_ALGO_HEUR3)
1553                 be_java_coal_start_jvm();
1554 #endif /* WITH_JVM */
1555
1556         algo_func = algos[algo].algo;
1557
1558         /* perform actual copy minimization */
1559         lc_timer_reset_and_start(timer);
1560         was_optimal = algo_func(co);
1561         lc_timer_stop(timer);
1562
1563         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1564         be_stat_ev_ull("co_optimal", was_optimal);
1565
1566         if (dump_flags & DUMP_AFTER) {
1567                 FILE *f = my_open(cenv, "", "-after.dot");
1568                 co_dump_ifg_dot(co, f, style_flags);
1569                 fclose(f);
1570         }
1571
1572         co_complete_stats(co, &after);
1573
1574         if (do_stats) {
1575                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1576                 ulong64 evitable          = after.costs     - after.inevit_costs;
1577
1578                 ir_printf("%30F ", cenv->irg);
1579                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1580
1581                 if(optimizable_costs > 0)
1582                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1583                 else
1584                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1585         }
1586
1587         /* Dump the interference graph in Appel's format. */
1588         if (dump_flags & DUMP_APPEL) {
1589                 FILE *f = my_open(cenv, "", ".apl");
1590                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1591                 co_dump_appel_graph(co, f);
1592                 fclose(f);
1593         }
1594
1595         be_stat_ev_ull("co_after_costs", after.costs);
1596         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1597
1598         co_free_graph_structure(co);
1599         co_free_ou_structure(co);
1600         free_copy_opt(co);
1601 }