be_abi_put_ignore_regs returns now number of ignore registers as unsigned
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "benodesets.h"
64 #include "bejavacoal.h"
65 #include "bestatevent.h"
66 #include "beirg_t.h"
67 #include "error.h"
68
69 #include <libcore/lc_timing.h>
70 #include <libcore/lc_opts.h>
71 #include <libcore/lc_opts_enum.h>
72
73 #define DUMP_BEFORE 1
74 #define DUMP_AFTER  2
75 #define DUMP_APPEL  4
76 #define DUMP_ALL    2 * DUMP_APPEL - 1
77
78 #define COST_FUNC_FREQ     1
79 #define COST_FUNC_LOOP     2
80 #define COST_FUNC_ALL_ONE  3
81
82 static unsigned   dump_flags  = 0;
83 static unsigned   style_flags = 0;
84 static unsigned   do_stats    = 0;
85 static cost_fct_t cost_func   = co_get_costs_exec_freq;
86 static unsigned   algo        = CO_ALGO_HEUR4;
87 static int        improve     = 1;
88
89 static const lc_opt_enum_mask_items_t dump_items[] = {
90         { "before",  DUMP_BEFORE },
91         { "after",   DUMP_AFTER  },
92         { "appel",   DUMP_APPEL  },
93         { "all",     DUMP_ALL    },
94         { NULL,      0 }
95 };
96
97 static const lc_opt_enum_mask_items_t style_items[] = {
98         { "color",   CO_IFG_DUMP_COLORS },
99         { "labels",  CO_IFG_DUMP_LABELS },
100         { "constr",  CO_IFG_DUMP_CONSTR },
101         { "shape",   CO_IFG_DUMP_SHAPE  },
102         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
103         { NULL,      0 }
104 };
105
106 static const lc_opt_enum_mask_items_t algo_items[] = {
107         { "none",   CO_ALGO_NONE  },
108         { "heur",   CO_ALGO_HEUR  },
109         { "heur2",  CO_ALGO_HEUR2 },
110 #ifdef WITH_JVM
111         { "heur3",  CO_ALGO_HEUR3 },
112 #endif /* WITH_JVM */
113         { "heur4",  CO_ALGO_HEUR4 },
114 #ifdef WITH_ILP
115         { "ilp",    CO_ALGO_ILP   },
116 #endif /* WITH_ILP */
117         { NULL,     0 }
118 };
119
120 typedef int (*opt_funcptr)(void);
121
122 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
123         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
124         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
125         { "one",    (opt_funcptr) co_get_costs_all_one },
126         { NULL,     NULL }
127 };
128
129 static lc_opt_enum_mask_var_t dump_var = {
130         &dump_flags, dump_items
131 };
132
133 static lc_opt_enum_mask_var_t style_var = {
134         &style_flags, style_items
135 };
136
137 static lc_opt_enum_mask_var_t algo_var = {
138         &algo, algo_items
139 };
140
141 static lc_opt_enum_func_ptr_var_t cost_func_var = {
142         (opt_funcptr*) &cost_func, cost_func_items
143 };
144
145 static const lc_opt_table_entry_t options[] = {
146         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
147         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
148         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
149         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
150         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
151         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
152         { NULL }
153 };
154
155 /* Insert additional options registration functions here. */
156 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
157 extern void be_co2_register_options(lc_opt_entry_t *grp);
158 extern void be_co3_register_options(lc_opt_entry_t *grp);
159
160 void be_init_copycoal(void)
161 {
162         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
163         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
164         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
165         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
166
167         lc_opt_add_table(co_grp, options);
168 }
169
170 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
171
172 #undef QUICK_AND_DIRTY_HACK
173
174 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
175 {
176         if (env->ifg)
177                 return be_ifg_connected(env->ifg, a, b);
178         else
179                 return values_interfere(env->birg, a, b);
180 }
181
182
183 /******************************************************************************
184     _____                           _
185    / ____|                         | |
186   | |  __  ___ _ __   ___ _ __ __ _| |
187   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
188   | |__| |  __/ | | |  __/ | | (_| | |
189    \_____|\___|_| |_|\___|_|  \__,_|_|
190
191  ******************************************************************************/
192
193 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
194
195
196 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
197 {
198         const char *s1, *s2, *s3;
199         int len;
200         copy_opt_t *co;
201
202         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
203
204         co = xcalloc(1, sizeof(*co));
205         co->cenv      = chordal_env;
206         co->aenv      = chordal_env->birg->main_env->arch_env;
207         co->irg       = chordal_env->irg;
208         co->cls       = chordal_env->cls;
209         co->get_costs = get_costs;
210
211         s1 = get_irp_prog_name();
212         s2 = get_entity_name(get_irg_entity(co->irg));
213         s3 = chordal_env->cls->name;
214         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
215         co->name = xmalloc(len);
216         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
217
218         return co;
219 }
220
221 void free_copy_opt(copy_opt_t *co) {
222         xfree(co->name);
223         free(co);
224 }
225
226 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
227         const arch_register_req_t *req;
228         const arch_register_t *reg;
229
230         if (arch_irn_is(co->aenv, irn, ignore))
231                 return 0;
232
233         reg = arch_get_irn_register(co->aenv, irn);
234         if (arch_register_type_is(reg, ignore))
235                 return 0;
236
237         req = arch_get_register_req(co->aenv, irn, -1);
238         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
239                 return 1;
240
241         return 0;
242 }
243
244 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
245         int cost = 0;
246         ir_loop *loop;
247         ir_node *root_block = get_nodes_block(root);
248
249         if (is_Phi(root)) {
250                 /* for phis the copies are placed in the corresponding pred-block */
251                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
252         } else {
253                 /* a perm places the copy in the same block as it resides */
254                 loop = get_irn_loop(root_block);
255         }
256         if (loop) {
257                 int d = get_loop_depth(loop);
258                 cost = d*d;
259         }
260         return 1+cost;
261 }
262
263 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
264         int res;
265         ir_node *root_bl = get_nodes_block(root);
266         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
267         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
268
269         /* don't allow values smaller than one. */
270         return res < 1 ? 1 : res;
271 }
272
273
274 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
275         return 1;
276 }
277
278 /******************************************************************************
279    ____        _   _    _       _ _          _____ _
280   / __ \      | | | |  | |     (_) |        / ____| |
281  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
282  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
283  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
284   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
285         | |                                                            __/ |
286         |_|                                                           |___/
287  ******************************************************************************/
288
289 /**
290  * Determines a maximum weighted independent set with respect to
291  * the interference and conflict edges of all nodes in a qnode.
292  */
293 static int ou_max_ind_set_costs(unit_t *ou) {
294         be_chordal_env_t *chordal_env = ou->co->cenv;
295         ir_node **safe, **unsafe;
296         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
297         bitset_t *curr;
298         int max, pos, curr_weight, best_weight = 0;
299
300         /* assign the nodes into two groups.
301          * safe: node has no interference, hence it is in every max stable set.
302          * unsafe: node has an interference
303          */
304         safe = alloca((ou->node_count-1) * sizeof(*safe));
305         safe_costs = 0;
306         safe_count = 0;
307         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
308         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
309         unsafe_count = 0;
310         for(i=1; i<ou->node_count; ++i) {
311                 int is_safe = 1;
312                 for(o=1; o<ou->node_count; ++o) {
313                         if (i==o)
314                                 continue;
315                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
316                                 unsafe_costs[unsafe_count] = ou->costs[i];
317                                 unsafe[unsafe_count] = ou->nodes[i];
318                                 ++unsafe_count;
319                                 is_safe = 0;
320                                 break;
321                         }
322                 }
323                 if (is_safe) {
324                         safe_costs += ou->costs[i];
325                         safe[safe_count++] = ou->nodes[i];
326                 }
327         }
328
329
330         /* now compute the best set out of the unsafe nodes*/
331         if (unsafe_count > MIS_HEUR_TRIGGER) {
332                 bitset_t *best = bitset_alloca(unsafe_count);
333                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
334                 for (i=0; i<unsafe_count; ++i) {
335                         bitset_set(best, i);
336                         /* check if it is a stable set */
337                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
338                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
339                                         bitset_clear(best, i); /* clear the bit and try next one */
340                                         break;
341                                 }
342                 }
343                 /* compute the weight */
344                 bitset_foreach(best, pos)
345                         best_weight += unsafe_costs[pos];
346         } else {
347                 /* Exact Algorithm: Brute force */
348                 curr = bitset_alloca(unsafe_count);
349                 bitset_set_all(curr);
350                 while ((max = bitset_popcnt(curr)) != 0) {
351                         /* check if curr is a stable set */
352                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
353                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
354                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
355                                                         goto no_stable_set;
356
357                         /* if we arrive here, we have a stable set */
358                         /* compute the weigth of the stable set*/
359                         curr_weight = 0;
360                         bitset_foreach(curr, pos)
361                                 curr_weight += unsafe_costs[pos];
362
363                         /* any better ? */
364                         if (curr_weight > best_weight) {
365                                 best_weight = curr_weight;
366                         }
367
368         no_stable_set:
369                         bitset_minus1(curr);
370                 }
371         }
372
373         return safe_costs+best_weight;
374 }
375
376 static void co_collect_units(ir_node *irn, void *env) {
377         copy_opt_t *co = env;
378         unit_t *unit;
379
380         if (!is_curr_reg_class(co, irn))
381                 return;
382         if (!co_is_optimizable_root(co, irn))
383                 return;
384
385         /* Init a new unit */
386         unit = xcalloc(1, sizeof(*unit));
387         unit->co = co;
388         unit->node_count = 1;
389         INIT_LIST_HEAD(&unit->queue);
390
391         /* Phi with some/all of its arguments */
392         if (is_Reg_Phi(irn)) {
393                 int i, arity;
394
395                 /* init */
396                 arity = get_irn_arity(irn);
397                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
398                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
399                 unit->nodes[0] = irn;
400
401                 /* fill */
402                 for (i=0; i<arity; ++i) {
403                         int o, arg_pos;
404                         ir_node *arg = get_irn_n(irn, i);
405
406                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
407                         if (arg == irn)
408                                 continue;
409                         if (nodes_interfere(co->cenv, irn, arg)) {
410                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
411                                 continue;
412                         }
413
414                         /* Else insert the argument of the phi to the members of this ou */
415                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
416
417                         if (! arch_irn_is(co->aenv, arg, ignore)) {
418                                 /* Check if arg has occurred at a prior position in the arg/list */
419                                 arg_pos = 0;
420                                 for (o=1; o<unit->node_count; ++o) {
421                                         if (unit->nodes[o] == arg) {
422                                                 arg_pos = o;
423                                                 break;
424                                         }
425                                 }
426
427                                 if (!arg_pos) { /* a new argument */
428                                         /* insert node, set costs */
429                                         unit->nodes[unit->node_count] = arg;
430                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
431                                         unit->node_count++;
432                                 } else { /* arg has occured before in same phi */
433                                         /* increase costs for existing arg */
434                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
435                                 }
436                         }
437                 }
438                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
439                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
440         } else
441
442         /* Proj of a perm with corresponding arg */
443         if (is_Perm_Proj(co->aenv, irn)) {
444                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
445                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
446                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
447                 unit->node_count = 2;
448                 unit->nodes[0] = irn;
449                 unit->nodes[1] = get_Perm_src(irn);
450                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
451         } else {
452                 const arch_register_req_t *req =
453                         arch_get_register_req(co->aenv, irn, -1);
454
455                 /* Src == Tgt of a 2-addr-code instruction */
456                 if (is_2addr_code(req)) {
457                         ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
458                         if (!arch_irn_is(co->aenv, other, ignore) &&
459                                         !nodes_interfere(co->cenv, irn, other)) {
460                                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
461                                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
462                                 unit->node_count = 2;
463                                 unit->nodes[0] = irn;
464                                 unit->nodes[1] = other;
465                                 unit->costs[1] = co->get_costs(co, irn, other, -1);
466                         }
467                 } else {
468                         assert(0 && "This is not an optimizable node!");
469                 }
470         }
471
472         /* Insert the new unit at a position according to its costs */
473         if (unit->node_count > 1) {
474                 int i;
475                 struct list_head *tmp;
476
477                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
478                 for(i=1; i<unit->node_count; ++i) {
479                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
480                         unit->all_nodes_costs += unit->costs[i];
481                 }
482
483                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
484                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
485                 /* Insert the new ou according to its sort_key */
486                 tmp = &co->units;
487                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
488                         tmp = tmp->next;
489                 list_add(&unit->units, tmp);
490         } else {
491                 free(unit);
492         }
493 }
494
495 #ifdef QUICK_AND_DIRTY_HACK
496
497 static int compare_ous(const void *k1, const void *k2) {
498         const unit_t *u1 = *((const unit_t **) k1);
499         const unit_t *u2 = *((const unit_t **) k2);
500         int i, o, u1_has_constr, u2_has_constr;
501         arch_register_req_t req;
502         const arch_env_t *aenv = u1->co->aenv;
503
504         /* Units with constraints come first */
505         u1_has_constr = 0;
506         for (i=0; i<u1->node_count; ++i) {
507                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
508                 if (arch_register_req_is(&req, limited)) {
509                         u1_has_constr = 1;
510                         break;
511                 }
512         }
513
514         u2_has_constr = 0;
515         for (i=0; i<u2->node_count; ++i) {
516                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
517                 if (arch_register_req_is(&req, limited)) {
518                         u2_has_constr = 1;
519                         break;
520                 }
521         }
522
523         if (u1_has_constr != u2_has_constr)
524                 return u2_has_constr - u1_has_constr;
525
526         /* Now check, whether the two units are connected */
527 #if 0
528         for (i=0; i<u1->node_count; ++i)
529                 for (o=0; o<u2->node_count; ++o)
530                         if (u1->nodes[i] == u2->nodes[o])
531                                 return 0;
532 #endif
533
534         /* After all, the sort key decides. Greater keys come first. */
535         return u2->sort_key - u1->sort_key;
536
537 }
538
539 /**
540  * Sort the ou's according to constraints and their sort_key
541  */
542 static void co_sort_units(copy_opt_t *co) {
543         int i, count = 0, costs;
544         unit_t *ou, **ous;
545
546         /* get the number of ous, remove them form the list and fill the array */
547         list_for_each_entry(unit_t, ou, &co->units, units)
548                 count++;
549         ous = alloca(count * sizeof(*ous));
550
551         costs = co_get_max_copy_costs(co);
552
553         i = 0;
554         list_for_each_entry(unit_t, ou, &co->units, units)
555                 ous[i++] = ou;
556
557         INIT_LIST_HEAD(&co->units);
558
559         assert(count == i && list_empty(&co->units));
560
561         for (i=0; i<count; ++i)
562                 ir_printf("%+F\n", ous[i]->nodes[0]);
563
564         qsort(ous, count, sizeof(*ous), compare_ous);
565
566         ir_printf("\n\n");
567         for (i=0; i<count; ++i)
568                 ir_printf("%+F\n", ous[i]->nodes[0]);
569
570         /* reinsert into list in correct order */
571         for (i=0; i<count; ++i)
572                 list_add_tail(&ous[i]->units, &co->units);
573
574         assert(costs == co_get_max_copy_costs(co));
575 }
576 #endif
577
578 void co_build_ou_structure(copy_opt_t *co) {
579         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
580         INIT_LIST_HEAD(&co->units);
581         irg_walk_graph(co->irg, co_collect_units, NULL, co);
582 #ifdef QUICK_AND_DIRTY_HACK
583         co_sort_units(co);
584 #endif
585 }
586
587 void co_free_ou_structure(copy_opt_t *co) {
588         unit_t *curr, *tmp;
589         ASSERT_OU_AVAIL(co);
590         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
591                 xfree(curr->nodes);
592                 xfree(curr->costs);
593                 xfree(curr);
594         }
595         co->units.next = NULL;
596 }
597
598 /* co_solve_heuristic() is implemented in becopyheur.c */
599
600 int co_get_max_copy_costs(const copy_opt_t *co) {
601         int i, res = 0;
602         unit_t *curr;
603
604         ASSERT_OU_AVAIL(co);
605
606         list_for_each_entry(unit_t, curr, &co->units, units) {
607                 res += curr->inevitable_costs;
608                 for (i=1; i<curr->node_count; ++i)
609                         res += curr->costs[i];
610         }
611         return res;
612 }
613
614 int co_get_inevit_copy_costs(const copy_opt_t *co) {
615         int res = 0;
616         unit_t *curr;
617
618         ASSERT_OU_AVAIL(co);
619
620         list_for_each_entry(unit_t, curr, &co->units, units)
621                 res += curr->inevitable_costs;
622         return res;
623 }
624
625 int co_get_copy_costs(const copy_opt_t *co) {
626         int i, res = 0;
627         unit_t *curr;
628
629         ASSERT_OU_AVAIL(co);
630
631         list_for_each_entry(unit_t, curr, &co->units, units) {
632                 int root_col = get_irn_col(co, curr->nodes[0]);
633                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
634                 res += curr->inevitable_costs;
635                 for (i=1; i<curr->node_count; ++i) {
636                         int arg_col = get_irn_col(co, curr->nodes[i]);
637                         if (root_col != arg_col) {
638                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
639                                 res += curr->costs[i];
640                         }
641                 }
642         }
643         return res;
644 }
645
646 int co_get_lower_bound(const copy_opt_t *co) {
647         int res = 0;
648         unit_t *curr;
649
650         ASSERT_OU_AVAIL(co);
651
652         list_for_each_entry(unit_t, curr, &co->units, units)
653                 res += curr->inevitable_costs + curr->min_nodes_costs;
654         return res;
655 }
656
657 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
658 {
659         bitset_t *seen = bitset_irg_malloc(co->irg);
660         affinity_node_t *an;
661
662         memset(stat, 0, sizeof(stat[0]));
663
664         /* count affinity edges. */
665         co_gs_foreach_aff_node(co, an) {
666                 neighb_t *neigh;
667                 stat->aff_nodes += 1;
668                 bitset_add_irn(seen, an->irn);
669                 co_gs_foreach_neighb(an, neigh) {
670                         if(!bitset_contains_irn(seen, neigh->irn)) {
671                                 stat->aff_edges += 1;
672                                 stat->max_costs += neigh->costs;
673
674                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
675                                         stat->costs += neigh->costs;
676                                         stat->unsatisfied_edges += 1;
677                                 }
678
679                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
680                                         stat->aff_int += 1;
681                                         stat->inevit_costs += neigh->costs;
682                                 }
683
684                         }
685                 }
686         }
687
688         bitset_free(seen);
689 }
690
691 /******************************************************************************
692    _____                 _        _____ _
693   / ____|               | |      / ____| |
694  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
695  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
696  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
697   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
698                   | |                                       __/ |
699                   |_|                                      |___/
700  ******************************************************************************/
701
702 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
703         const affinity_node_t *n1 = k1;
704         const affinity_node_t *n2 = k2;
705
706         return (n1->irn != n2->irn);
707 }
708
709 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
710         affinity_node_t new_node, *node;
711         neighb_t        *nbr;
712         int             allocnew = 1;
713
714         new_node.irn        = n1;
715         new_node.degree     = 0;
716         new_node.neighbours = NULL;
717         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
718
719         for (nbr = node->neighbours; nbr; nbr = nbr->next)
720                 if (nbr->irn == n2) {
721                         allocnew = 0;
722                         break;
723                 }
724
725         /* if we did not find n2 in n1's neighbourhood insert it */
726         if (allocnew) {
727                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
728                 nbr->irn   = n2;
729                 nbr->costs = 0;
730                 nbr->next  = node->neighbours;
731
732                 node->neighbours = nbr;
733                 node->degree++;
734         }
735
736         /* now nbr points to n1's neighbour-entry of n2 */
737         nbr->costs += costs;
738 }
739
740 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
741         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
742                 add_edge(co, n1, n2, costs);
743                 add_edge(co, n2, n1, costs);
744         }
745 }
746
747 static void build_graph_walker(ir_node *irn, void *env) {
748         copy_opt_t *co = env;
749         int pos, max;
750         const arch_register_t *reg;
751
752         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
753                 return;
754
755         reg = arch_get_irn_register(co->aenv, irn);
756         if (arch_register_type_is(reg, ignore))
757                 return;
758
759         /* Phis */
760         if (is_Reg_Phi(irn))
761                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
762                         ir_node *arg = get_irn_n(irn, pos);
763                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
764                 }
765
766         /* Perms */
767         else if (is_Perm_Proj(co->aenv, irn)) {
768                 ir_node *arg = get_Perm_src(irn);
769                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
770         }
771
772         /* 2-address code */
773         else {
774                 const arch_register_req_t *req =
775                         arch_get_register_req(co->aenv, irn, -1);
776                 if (is_2addr_code(req)) {
777                         ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
778                         if (! arch_irn_is(co->aenv, other, ignore))
779                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
780                 }
781         }
782 }
783
784 void co_build_graph_structure(copy_opt_t *co) {
785         obstack_init(&co->obst);
786         co->nodes = new_set(compare_affinity_node_t, 32);
787
788         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
789 }
790
791 void co_free_graph_structure(copy_opt_t *co) {
792         ASSERT_GS_AVAIL(co);
793
794         del_set(co->nodes);
795         obstack_free(&co->obst, NULL);
796         co->nodes = NULL;
797 }
798
799 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
800
801 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
802         affinity_node_t new_node, *n;
803
804         ASSERT_GS_AVAIL(co);
805
806         new_node.irn = irn;
807         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
808         if (n) {
809                 return (n->degree > 0);
810         } else
811                 return 0;
812 }
813
814 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
815 {
816         be_ifg_t *ifg  = co->cenv->ifg;
817         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
818
819         ir_node *irn;
820         void *it, *nit;
821         int i, n, n_regs;
822
823         n_regs = 0;
824         for(i = 0; i < co->cls->n_regs; ++i) {
825                 const arch_register_t *reg = &co->cls->regs[i];
826                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
827         }
828
829         /*
830          * n contains the first node number.
831          * the values below n are the pre-colored register nodes
832          */
833
834         it  = be_ifg_nodes_iter_alloca(ifg);
835         nit = be_ifg_neighbours_iter_alloca(ifg);
836
837         n = n_regs;
838         be_ifg_foreach_node(ifg, it, irn) {
839                 if(!arch_irn_is(co->aenv, irn, ignore))
840                         set_irn_link(irn, INT_TO_PTR(n++));
841         }
842
843         fprintf(f, "%d %d\n", n, n_regs);
844
845         be_ifg_foreach_node(ifg, it, irn) {
846                 if(!arch_irn_is(co->aenv, irn, ignore)) {
847                         int idx            = PTR_TO_INT(get_irn_link(irn));
848                         affinity_node_t *a = get_affinity_info(co, irn);
849
850                         const arch_register_req_t *req;
851                         ir_node *adj;
852
853                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
854                         if(arch_register_req_is(req, limited)) {
855                                 for(i = 0; i < co->cls->n_regs; ++i) {
856                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
857                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
858                                 }
859                         }
860
861
862                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
863                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
864                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
865                                         if(idx < adj_idx)
866                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
867                                 }
868                         }
869
870                         if(a) {
871                                 neighb_t *n;
872
873                                 co_gs_foreach_neighb(a, n) {
874                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
875                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
876                                                 if(idx < n_idx)
877                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
878                                         }
879                                 }
880                         }
881                 }
882         }
883 }
884
885 typedef struct _appel_clique_walker_t {
886         ir_phase ph;
887         const copy_opt_t *co;
888         int curr_nr;
889         int node_count;
890         FILE *f;
891         int dumb;
892         int *color_map;
893         struct obstack obst;
894 } appel_clique_walker_t;
895
896 typedef struct _appel_block_info_t {
897         int *live_end_nr;
898         int *live_in_nr;
899         int *phi_nr;
900         ir_node **live_end;
901         ir_node **live_in;
902         ir_node **phi;
903         int n_live_end;
904         int n_live_in;
905         int n_phi;
906 } appel_block_info_t;
907
908 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
909 {
910 #if 0
911         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
912         int res = (int) freq;
913         return res == 0 ? 1 : res;
914 #else
915         ir_loop *loop = get_irn_loop(bl);
916         if(loop) {
917                 int d = get_loop_depth(loop);
918                 return 1 + d * d;
919         }
920         return 1;
921 #endif
922 }
923
924 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
925 {
926         appel_block_info_t *res = NULL;
927
928         if(is_Block(irn)) {
929                 appel_clique_walker_t *d = (void *) phase;
930                 res = phase_alloc(phase, sizeof(res[0]));
931                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
932                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
933                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
934                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
935                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
936                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
937         }
938
939         return res;
940 }
941
942 typedef struct _insn_list_t {
943         be_insn_t *insn;
944         struct list_head list;
945 } insn_list_t;
946
947 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
948 {
949         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
950         int i;
951
952         for(i = 0; i < bli->n_live_end; ++i)
953                 if(bli->live_end[i] == irn)
954                         return bli->live_end_nr[i];
955
956         return -1;
957 }
958
959 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
960 {
961         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
962         ir_node *irn;
963         int n_live;
964         int j;
965
966         n_live = 0;
967         foreach_pset(live, irn)
968                 live_arr[n_live++] = irn;
969
970         /* dump the live after clique */
971         if(!env->dumb) {
972                 for(j = 0; j < n_live; ++j) {
973                         int k;
974
975                         for(k = j + 1; k < n_live; ++k) {
976                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
977                         }
978                         fprintf(env->f, "\n");
979                 }
980         }
981
982         /* dump the affinities */
983         for(j = 0; !env->dumb && j < n_live; ++j) {
984                 ir_node *irn = live_arr[j];
985                 int old_nr = PTR_TO_INT(get_irn_link(irn));
986
987                 /* if the node was already live in the last insn dump the affinity */
988                 if(old_nr > start_nr) {
989                         int weight = appel_aff_weight(env, bl);
990                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
991                 }
992         }
993
994         /* set the current numbers into the link field. */
995         for(j = 0; j < n_live; ++j) {
996                 ir_node *irn = live_arr[j];
997                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
998         }
999
1000         return curr_nr + n_live;
1001 }
1002
1003 static void appel_walker(ir_node *bl, void *data)
1004 {
1005         appel_clique_walker_t *env = data;
1006         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1007         struct obstack *obst       = &env->obst;
1008         void *base                 = obstack_base(obst);
1009         pset *live                 = pset_new_ptr_default();
1010         be_lv_t *lv                = env->co->cenv->birg->lv;
1011
1012         int n_insns  = 0;
1013         int n_nodes  = 0;
1014         int start_nr = env->curr_nr;
1015         int curr_nr  = start_nr;
1016
1017         be_insn_env_t insn_env;
1018         int i, j;
1019         ir_node *irn;
1020         be_insn_t **insns;
1021
1022         insn_env.aenv = env->co->aenv;
1023         insn_env.cls  = env->co->cls;
1024         insn_env.obst = obst;
1025         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1026
1027         /* Guess how many insns will be in this block. */
1028         sched_foreach(bl, irn)
1029                 n_nodes++;
1030
1031         bli->n_phi = 0;
1032         insns = malloc(n_nodes * sizeof(insns[0]));
1033
1034         /* Put all insns in an array. */
1035         irn = sched_first(bl);
1036         while(!sched_is_end(irn)) {
1037                 be_insn_t *insn;
1038                 insn = be_scan_insn(&insn_env, irn);
1039                 insns[n_insns++] = insn;
1040                 irn = insn->next_insn;
1041         }
1042
1043         DBG((dbg, LEVEL_2, "%+F\n", bl));
1044         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live);
1045
1046         /* Generate the bad and ugly. */
1047         for(i = n_insns - 1; i >= 0; --i) {
1048                 be_insn_t *insn = insns[i];
1049
1050                 /* The first live set has to be saved in the block border set. */
1051                 if(i == n_insns - 1) {
1052                         j = 0;
1053                         foreach_pset(live, irn) {
1054                                 bli->live_end[j]    = irn;
1055                                 bli->live_end_nr[j] = curr_nr + j;
1056                                 ++j;
1057                         }
1058                         bli->n_live_end = j;
1059                 }
1060
1061                 if(!env->dumb) {
1062                         for(j = 0; j < insn->use_start; ++j) {
1063                                 ir_node *op   = insn->ops[j].carrier;
1064                                 bitset_t *adm = insn->ops[j].regs;
1065                                 int k;
1066                                 int nr;
1067
1068                                 if(!insn->ops[j].has_constraints)
1069                                         continue;
1070
1071                                 nr = 0;
1072                                 foreach_pset(live, irn) {
1073                                         if(irn == op) {
1074                                                 pset_break(live);
1075                                                 break;
1076                                         }
1077                                         ++nr;
1078                                 }
1079
1080                                 assert(nr < pset_count(live));
1081
1082                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1083                                         int mapped_col = env->color_map[k];
1084                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1085                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1086                                 }
1087                         }
1088                 }
1089
1090                 /* dump the clique and update the stuff. */
1091                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1092
1093                 /* remove all defs. */
1094                 for(j = 0; j < insn->use_start; ++j)
1095                         pset_remove_ptr(live, insn->ops[j].carrier);
1096
1097                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1098                         bli->phi[bli->n_phi]    = insn->irn;
1099                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1100                         bli->n_phi++;
1101                 }
1102
1103                 /* add all uses */
1104                 else
1105                         for(j = insn->use_start; j < insn->n_ops; ++j)
1106                                 pset_insert_ptr(live, insn->ops[j].carrier);
1107         }
1108
1109         /* print the start clique. */
1110         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1111
1112         i = 0;
1113         foreach_pset(live, irn) {
1114                 bli->live_in[i]    = irn;
1115                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1116                 ++i;
1117         }
1118         bli->n_live_in = i;
1119
1120         del_pset(live);
1121         free(insns);
1122         obstack_free(obst, base);
1123         env->curr_nr = curr_nr;
1124 }
1125
1126 static void appel_inter_block_aff(ir_node *bl, void *data)
1127 {
1128         appel_clique_walker_t *env = data;
1129         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1130
1131         int i, j, n;
1132
1133         for(i = 0; i < bli->n_live_in; ++i) {
1134                 ir_node *irn = bli->live_in[i];
1135
1136                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1137                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1138
1139                         int nr = appel_get_live_end_nr(env, pred, irn);
1140                         assert(nr >= 0);
1141                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1142                 }
1143         }
1144
1145         for(i = 0; i < bli->n_phi; ++i) {
1146                 ir_node *irn = bli->phi[i];
1147
1148                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1149                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1150                         ir_node *op = get_irn_n(irn, j);
1151
1152                         int nr = appel_get_live_end_nr(env, pred, op);
1153                         assert(nr >= 0);
1154                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1155                 }
1156         }
1157
1158 }
1159
1160 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1161 {
1162         int i;
1163         int n_colors;
1164         appel_clique_walker_t env;
1165         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1166         be_lv_t *lv = co->cenv->birg->lv;
1167
1168         be_liveness_recompute(lv);
1169         obstack_init(&env.obst);
1170         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1171         env.curr_nr = co->cls->n_regs;
1172         env.co = co;
1173         env.f = f;
1174
1175         bitset_copy(adm, co->cenv->ignore_colors);
1176         bitset_flip_all(adm);
1177
1178         /* Make color map. */
1179         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1180         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1181                 const arch_register_t *reg = &co->cls->regs[i];
1182                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1183         }
1184
1185         env.dumb = 1;
1186         env.curr_nr = n_colors;
1187         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1188         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1189
1190         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1191
1192         /* make the first k nodes interfere */
1193         for(i = 0; i < n_colors; ++i) {
1194                 int j;
1195                 for(j = i + 1; j < n_colors; ++j)
1196                         fprintf(f, "%d %d -1 ", i, j);
1197                 fprintf(f, "\n");
1198         }
1199
1200         env.dumb = 0;
1201         env.curr_nr = n_colors;
1202         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1203         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1204         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1205         obstack_free(&env.obst, NULL);
1206 }
1207
1208 /*
1209          ___ _____ ____   ____   ___ _____   ____                        _
1210         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1211          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1212          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1213         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1214                                                                   |_|            |___/
1215 */
1216
1217 static const char *get_dot_color_name(int col)
1218 {
1219         static const char *names[] = {
1220                 "blue",
1221                 "red",
1222                 "green",
1223                 "yellow",
1224                 "cyan",
1225                 "magenta",
1226                 "orange",
1227                 "chocolate",
1228                 "beige",
1229                 "navy",
1230                 "darkgreen",
1231                 "darkred",
1232                 "lightPink",
1233                 "chartreuse",
1234                 "lightskyblue",
1235                 "linen",
1236                 "pink",
1237                 "lightslateblue",
1238                 "mintcream",
1239                 "red",
1240                 "darkolivegreen",
1241                 "mediumblue",
1242                 "mistyrose",
1243                 "salmon",
1244                 "darkseagreen",
1245                 "mediumslateblue"
1246                 "moccasin",
1247                 "tomato",
1248                 "forestgreen",
1249                 "darkturquoise",
1250                 "palevioletred"
1251         };
1252
1253         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1254 }
1255
1256 typedef struct _co_ifg_dump_t {
1257         const copy_opt_t *co;
1258         unsigned flags;
1259 } co_ifg_dump_t;
1260
1261 static void ifg_dump_graph_attr(FILE *f, void *self)
1262 {
1263         fprintf(f, "overlap=scale");
1264 }
1265
1266 static int ifg_is_dump_node(void *self, ir_node *irn)
1267 {
1268         co_ifg_dump_t *cod = self;
1269         return !arch_irn_is(cod->co->aenv, irn, ignore);
1270 }
1271
1272 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1273 {
1274         co_ifg_dump_t *env         = self;
1275         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1276         const arch_register_req_t *req;
1277         int limited;
1278
1279         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1280         limited = arch_register_req_is(req, limited);
1281
1282         if(env->flags & CO_IFG_DUMP_LABELS) {
1283                 ir_fprintf(f, "label=\"%+F", irn);
1284
1285 #if 0
1286                 // TODO fix this...
1287                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1288                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1289                         req.limited(req.limited_env, bs);
1290                         ir_fprintf(f, "\\n%B", bs);
1291                 }
1292 #endif
1293                 ir_fprintf(f, "\" ");
1294         } else {
1295                 fprintf(f, "label=\"\" shape=point " );
1296         }
1297
1298         if(env->flags & CO_IFG_DUMP_SHAPE)
1299                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1300
1301         if(env->flags & CO_IFG_DUMP_COLORS)
1302                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1303 }
1304
1305 static void ifg_dump_at_end(FILE *file, void *self)
1306 {
1307         co_ifg_dump_t *env = self;
1308         affinity_node_t *a;
1309
1310         co_gs_foreach_aff_node(env->co, a) {
1311                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1312                 unsigned aidx = get_irn_idx(a->irn);
1313                 neighb_t *n;
1314
1315                 co_gs_foreach_neighb(a, n) {
1316                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1317                         unsigned nidx = get_irn_idx(n->irn);
1318
1319                         if(aidx < nidx) {
1320                                 const char *color = nr == ar ? "blue" : "red";
1321                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1322                                 if(env->flags & CO_IFG_DUMP_LABELS)
1323                                         fprintf(file, "label=\"%d\" ", n->costs);
1324                                 if(env->flags & CO_IFG_DUMP_COLORS)
1325                                         fprintf(file, "color=%s ", color);
1326                                 else
1327                                         fprintf(file, "style=dotted");
1328                                 fprintf(file, "];\n");
1329                         }
1330                 }
1331         }
1332 }
1333
1334
1335 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1336         ifg_is_dump_node,
1337         ifg_dump_graph_attr,
1338         ifg_dump_node_attr,
1339         NULL,
1340         NULL,
1341         ifg_dump_at_end
1342 };
1343
1344
1345
1346 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1347 {
1348         co_ifg_dump_t cod;
1349
1350         cod.co    = co;
1351         cod.flags = flags;
1352         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1353 }
1354
1355
1356 void co_solve_park_moon(copy_opt_t *opt)
1357 {
1358
1359 }
1360
1361 static int void_algo(copy_opt_t *co)
1362 {
1363         return 0;
1364 }
1365
1366 /*
1367                 _    _                  _ _   _
1368            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1369           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1370          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1371         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1372                            |___/
1373 */
1374
1375 typedef struct {
1376         co_algo_t  *algo;
1377         const char *name;
1378         int        can_improve_existing;
1379 } co_algo_info_t;
1380
1381 static co_algo_info_t algos[] = {
1382         { void_algo,               "none",  0 },
1383         { co_solve_heuristic,      "heur1", 0 },
1384         { co_solve_heuristic_new,  "heur2", 0 },
1385         { co_solve_heuristic_java, "heur3", 0 },
1386         { co_solve_heuristic_mst,  "heur4", 0 },
1387 #ifdef WITH_ILP
1388         { co_solve_ilp2,           "ilp",   1 },
1389 #endif
1390         { NULL,                    "",      0 }
1391 };
1392
1393 /*
1394     __  __       _         ____       _
1395    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1396    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1397    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1398    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1399
1400 */
1401
1402 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1403 {
1404         FILE *result;
1405         char buf[1024];
1406
1407         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1408         result = fopen(buf, "wt");
1409         if(result == NULL) {
1410                 panic("Couldn't open '%s' for writing.", buf);
1411         }
1412
1413         return result;
1414 }
1415
1416 void co_driver(be_chordal_env_t *cenv)
1417 {
1418         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1419         co_complete_stats_t before, after;
1420         copy_opt_t          *co;
1421         co_algo_t           *algo_func;
1422         int                 was_optimal = 0;
1423
1424         if (algo < 0 || algo >= CO_ALGO_LAST)
1425                 return;
1426
1427         co = new_copy_opt(cenv, cost_func);
1428         co_build_ou_structure(co);
1429         co_build_graph_structure(co);
1430
1431         co_complete_stats(co, &before);
1432
1433         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1434         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1435         be_stat_ev_ull("co_max_costs",    before.max_costs);
1436         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1437         be_stat_ev_ull("co_aff_int",      before.aff_int);
1438
1439         be_stat_ev_ull("co_init_costs",   before.costs);
1440         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1441
1442         /* Dump the interference graph in Appel's format. */
1443         if (dump_flags & DUMP_APPEL) {
1444                 FILE *f = my_open(cenv, "", ".apl");
1445                 co_dump_appel_graph(co, f);
1446                 fclose(f);
1447         }
1448
1449         if (dump_flags & DUMP_BEFORE) {
1450                 FILE *f = my_open(cenv, "", "-before.dot");
1451                 co_dump_ifg_dot(co, f, style_flags);
1452                 fclose(f);
1453         }
1454
1455         /* if the algo can improve results, provide an initial solution with heur3 */
1456         if (improve && algos[algo].can_improve_existing) {
1457                 co_complete_stats_t stats;
1458
1459                 /* produce a heuristic solution */
1460 #ifdef WITH_JVM
1461                 co_solve_heuristic_java(co);
1462 #else
1463                 co_solve_heuristic(co);
1464 #endif /* WITH_JVM */
1465
1466                 /* do the stats and provide the current costs */
1467                 co_complete_stats(co, &stats);
1468                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1469         }
1470
1471 #ifdef WITH_JVM
1472         /* start the JVM here so that it does not tamper the timing. */
1473         if (algo == CO_ALGO_HEUR3)
1474                 be_java_coal_start_jvm();
1475 #endif /* WITH_JVM */
1476
1477         algo_func = algos[algo].algo;
1478
1479         /* perform actual copy minimization */
1480         lc_timer_reset_and_start(timer);
1481         was_optimal = algo_func(co);
1482         lc_timer_stop(timer);
1483
1484         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1485         be_stat_ev_ull("co_optimal", was_optimal);
1486
1487         if (dump_flags & DUMP_AFTER) {
1488                 FILE *f = my_open(cenv, "", "-after.dot");
1489                 co_dump_ifg_dot(co, f, style_flags);
1490                 fclose(f);
1491         }
1492
1493         co_complete_stats(co, &after);
1494
1495         if (do_stats) {
1496                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1497                 ulong64 evitable          = after.costs     - after.inevit_costs;
1498
1499                 ir_printf("%30F ", cenv->irg);
1500                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1501
1502                 if(optimizable_costs > 0)
1503                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1504                 else
1505                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1506         }
1507
1508         be_stat_ev_ull("co_after_costs", after.costs);
1509         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1510
1511         co_free_graph_structure(co);
1512         co_free_ou_structure(co);
1513         free_copy_opt(co);
1514 }