9a7540f82c577c1b9d32a49a0732d13653de0119
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "benodesets.h"
64 #include "bejavacoal.h"
65 #include "bestatevent.h"
66 #include "beirg_t.h"
67 #include "error.h"
68
69 #define DUMP_BEFORE 1
70 #define DUMP_AFTER  2
71 #define DUMP_APPEL  4
72 #define DUMP_ALL    2 * DUMP_APPEL - 1
73
74 #define COST_FUNC_FREQ     1
75 #define COST_FUNC_LOOP     2
76 #define COST_FUNC_ALL_ONE  3
77
78 static unsigned   dump_flags  = 0;
79 static unsigned   style_flags = 0;
80 static unsigned   do_stats    = 0;
81 static cost_fct_t cost_func   = co_get_costs_exec_freq;
82 static unsigned   algo        = CO_ALGO_HEUR4;
83 static int        improve     = 1;
84
85 #ifdef WITH_LIBCORE
86 #include <libcore/lc_timing.h>
87 #include <libcore/lc_opts.h>
88 #include <libcore/lc_opts_enum.h>
89
90 static const lc_opt_enum_mask_items_t dump_items[] = {
91         { "before",  DUMP_BEFORE },
92         { "after",   DUMP_AFTER  },
93         { "appel",   DUMP_APPEL  },
94         { "all",     DUMP_ALL    },
95         { NULL,      0 }
96 };
97
98 static const lc_opt_enum_mask_items_t style_items[] = {
99         { "color",   CO_IFG_DUMP_COLORS },
100         { "labels",  CO_IFG_DUMP_LABELS },
101         { "constr",  CO_IFG_DUMP_CONSTR },
102         { "shape",   CO_IFG_DUMP_SHAPE  },
103         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
104         { NULL,      0 }
105 };
106
107 static const lc_opt_enum_mask_items_t algo_items[] = {
108         { "none",   CO_ALGO_NONE  },
109         { "heur",   CO_ALGO_HEUR  },
110         { "heur2",  CO_ALGO_HEUR2 },
111 #ifdef WITH_JVM
112         { "heur3",  CO_ALGO_HEUR3 },
113 #endif /* WITH_JVM */
114         { "heur4",  CO_ALGO_HEUR4 },
115 #ifdef WITH_ILP
116         { "ilp",    CO_ALGO_ILP   },
117 #endif /* WITH_ILP */
118         { NULL,     0 }
119 };
120
121 typedef int (*opt_funcptr)(void);
122
123 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
124         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
125         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
126         { "one",    (opt_funcptr) co_get_costs_all_one },
127         { NULL,     NULL }
128 };
129
130 static lc_opt_enum_mask_var_t dump_var = {
131         &dump_flags, dump_items
132 };
133
134 static lc_opt_enum_mask_var_t style_var = {
135         &style_flags, style_items
136 };
137
138 static lc_opt_enum_mask_var_t algo_var = {
139         &algo, algo_items
140 };
141
142 static lc_opt_enum_func_ptr_var_t cost_func_var = {
143         (opt_funcptr*) &cost_func, cost_func_items
144 };
145
146 static const lc_opt_table_entry_t options[] = {
147         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
148         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
149         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
150         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
151         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
152         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
153         LC_OPT_LAST
154 };
155 #endif /* WITH_LIBCORE */
156
157 /* Insert additional options registration functions here. */
158 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
159 extern void be_co2_register_options(lc_opt_entry_t *grp);
160 extern void be_co3_register_options(lc_opt_entry_t *grp);
161
162 void be_init_copycoal(void)
163 {
164 #ifdef WITH_LIBCORE
165         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
166         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
167         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
168         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
169
170         lc_opt_add_table(co_grp, options);
171 #endif
172 }
173
174 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
175
176 #undef QUICK_AND_DIRTY_HACK
177
178 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
179 {
180         if (env->ifg)
181                 return be_ifg_connected(env->ifg, a, b);
182         else
183                 return values_interfere(env->birg, a, b);
184 }
185
186
187 /******************************************************************************
188     _____                           _
189    / ____|                         | |
190   | |  __  ___ _ __   ___ _ __ __ _| |
191   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
192   | |__| |  __/ | | |  __/ | | (_| | |
193    \_____|\___|_| |_|\___|_|  \__,_|_|
194
195  ******************************************************************************/
196
197 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
198
199
200 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
201 {
202         const char *s1, *s2, *s3;
203         int len;
204         copy_opt_t *co;
205
206         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
207
208         co = xcalloc(1, sizeof(*co));
209         co->cenv      = chordal_env;
210         co->aenv      = chordal_env->birg->main_env->arch_env;
211         co->irg       = chordal_env->irg;
212         co->cls       = chordal_env->cls;
213         co->get_costs = get_costs;
214
215         s1 = get_irp_prog_name();
216         s2 = get_entity_name(get_irg_entity(co->irg));
217         s3 = chordal_env->cls->name;
218         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
219         co->name = xmalloc(len);
220         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
221
222         return co;
223 }
224
225 void free_copy_opt(copy_opt_t *co) {
226         xfree(co->name);
227         free(co);
228 }
229
230 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
231         const arch_register_req_t *req;
232         const arch_register_t *reg;
233
234         if (arch_irn_is(co->aenv, irn, ignore))
235                 return 0;
236
237         reg = arch_get_irn_register(co->aenv, irn);
238         if (arch_register_type_is(reg, ignore))
239                 return 0;
240
241         req = arch_get_register_req(co->aenv, irn, -1);
242         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
243                 return 1;
244
245         return 0;
246 }
247
248 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
249         int cost = 0;
250         ir_loop *loop;
251         ir_node *root_block = get_nodes_block(root);
252         (void) co;
253         (void) arg;
254
255         if (is_Phi(root)) {
256                 /* for phis the copies are placed in the corresponding pred-block */
257                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
258         } else {
259                 /* a perm places the copy in the same block as it resides */
260                 loop = get_irn_loop(root_block);
261         }
262         if (loop) {
263                 int d = get_loop_depth(loop);
264                 cost = d*d;
265         }
266         return 1+cost;
267 }
268
269 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
270         int res;
271         ir_node *root_bl = get_nodes_block(root);
272         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
273         (void) arg;
274         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
275
276         /* don't allow values smaller than one. */
277         return res < 1 ? 1 : res;
278 }
279
280
281 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
282         (void) co;
283         (void) root;
284         (void) arg;
285         (void) pos;
286         return 1;
287 }
288
289 /******************************************************************************
290    ____        _   _    _       _ _          _____ _
291   / __ \      | | | |  | |     (_) |        / ____| |
292  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
293  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
294  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
295   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
296         | |                                                            __/ |
297         |_|                                                           |___/
298  ******************************************************************************/
299
300 /**
301  * Determines a maximum weighted independent set with respect to
302  * the interference and conflict edges of all nodes in a qnode.
303  */
304 static int ou_max_ind_set_costs(unit_t *ou) {
305         be_chordal_env_t *chordal_env = ou->co->cenv;
306         ir_node **safe, **unsafe;
307         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
308         bitset_t *curr;
309         bitset_pos_t pos;
310         int max, curr_weight, best_weight = 0;
311
312         /* assign the nodes into two groups.
313          * safe: node has no interference, hence it is in every max stable set.
314          * unsafe: node has an interference
315          */
316         safe = alloca((ou->node_count-1) * sizeof(*safe));
317         safe_costs = 0;
318         safe_count = 0;
319         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
320         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
321         unsafe_count = 0;
322         for(i=1; i<ou->node_count; ++i) {
323                 int is_safe = 1;
324                 for(o=1; o<ou->node_count; ++o) {
325                         if (i==o)
326                                 continue;
327                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
328                                 unsafe_costs[unsafe_count] = ou->costs[i];
329                                 unsafe[unsafe_count] = ou->nodes[i];
330                                 ++unsafe_count;
331                                 is_safe = 0;
332                                 break;
333                         }
334                 }
335                 if (is_safe) {
336                         safe_costs += ou->costs[i];
337                         safe[safe_count++] = ou->nodes[i];
338                 }
339         }
340
341
342         /* now compute the best set out of the unsafe nodes*/
343         if (unsafe_count > MIS_HEUR_TRIGGER) {
344                 bitset_t *best = bitset_alloca(unsafe_count);
345                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
346                 for (i=0; i<unsafe_count; ++i) {
347                         bitset_set(best, i);
348                         /* check if it is a stable set */
349                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
350                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
351                                         bitset_clear(best, i); /* clear the bit and try next one */
352                                         break;
353                                 }
354                 }
355                 /* compute the weight */
356                 bitset_foreach(best, pos)
357                         best_weight += unsafe_costs[pos];
358         } else {
359                 /* Exact Algorithm: Brute force */
360                 curr = bitset_alloca(unsafe_count);
361                 bitset_set_all(curr);
362                 while ((max = bitset_popcnt(curr)) != 0) {
363                         /* check if curr is a stable set */
364                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
365                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
366                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
367                                                         goto no_stable_set;
368
369                         /* if we arrive here, we have a stable set */
370                         /* compute the weigth of the stable set*/
371                         curr_weight = 0;
372                         bitset_foreach(curr, pos)
373                                 curr_weight += unsafe_costs[pos];
374
375                         /* any better ? */
376                         if (curr_weight > best_weight) {
377                                 best_weight = curr_weight;
378                         }
379
380         no_stable_set:
381                         bitset_minus1(curr);
382                 }
383         }
384
385         return safe_costs+best_weight;
386 }
387
388 static void co_collect_units(ir_node *irn, void *env) {
389         copy_opt_t *co = env;
390         unit_t *unit;
391
392         if (!is_curr_reg_class(co, irn))
393                 return;
394         if (!co_is_optimizable_root(co, irn))
395                 return;
396
397         /* Init a new unit */
398         unit = xcalloc(1, sizeof(*unit));
399         unit->co = co;
400         unit->node_count = 1;
401         INIT_LIST_HEAD(&unit->queue);
402
403         /* Phi with some/all of its arguments */
404         if (is_Reg_Phi(irn)) {
405                 int i, arity;
406
407                 /* init */
408                 arity = get_irn_arity(irn);
409                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
410                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
411                 unit->nodes[0] = irn;
412
413                 /* fill */
414                 for (i=0; i<arity; ++i) {
415                         int o, arg_pos;
416                         ir_node *arg = get_irn_n(irn, i);
417
418                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
419                         if (arg == irn)
420                                 continue;
421                         if (nodes_interfere(co->cenv, irn, arg)) {
422                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
423                                 continue;
424                         }
425
426                         /* Else insert the argument of the phi to the members of this ou */
427                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
428
429                         if (! arch_irn_is(co->aenv, arg, ignore)) {
430                                 /* Check if arg has occurred at a prior position in the arg/list */
431                                 arg_pos = 0;
432                                 for (o=1; o<unit->node_count; ++o) {
433                                         if (unit->nodes[o] == arg) {
434                                                 arg_pos = o;
435                                                 break;
436                                         }
437                                 }
438
439                                 if (!arg_pos) { /* a new argument */
440                                         /* insert node, set costs */
441                                         unit->nodes[unit->node_count] = arg;
442                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
443                                         unit->node_count++;
444                                 } else { /* arg has occurred before in same phi */
445                                         /* increase costs for existing arg */
446                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
447                                 }
448                         }
449                 }
450                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
451                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
452         } else
453
454         /* Proj of a perm with corresponding arg */
455         if (is_Perm_Proj(co->aenv, irn)) {
456                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
457                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
458                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
459                 unit->node_count = 2;
460                 unit->nodes[0] = irn;
461                 unit->nodes[1] = get_Perm_src(irn);
462                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
463         } else {
464                 const arch_register_req_t *req =
465                         arch_get_register_req(co->aenv, irn, -1);
466
467                 /* Src == Tgt of a 2-addr-code instruction */
468                 if (is_2addr_code(req)) {
469                         ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
470                         if (!arch_irn_is(co->aenv, other, ignore) &&
471                                         !nodes_interfere(co->cenv, irn, other)) {
472                                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
473                                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
474                                 unit->node_count = 2;
475                                 unit->nodes[0] = irn;
476                                 unit->nodes[1] = other;
477                                 unit->costs[1] = co->get_costs(co, irn, other, -1);
478                         }
479                 } else {
480                         assert(0 && "This is not an optimizable node!");
481                 }
482         }
483
484         /* Insert the new unit at a position according to its costs */
485         if (unit->node_count > 1) {
486                 int i;
487                 struct list_head *tmp;
488
489                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
490                 for(i=1; i<unit->node_count; ++i) {
491                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
492                         unit->all_nodes_costs += unit->costs[i];
493                 }
494
495                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
496                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
497                 /* Insert the new ou according to its sort_key */
498                 tmp = &co->units;
499                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
500                         tmp = tmp->next;
501                 list_add(&unit->units, tmp);
502         } else {
503                 free(unit);
504         }
505 }
506
507 #ifdef QUICK_AND_DIRTY_HACK
508
509 static int compare_ous(const void *k1, const void *k2) {
510         const unit_t *u1 = *((const unit_t **) k1);
511         const unit_t *u2 = *((const unit_t **) k2);
512         int i, o, u1_has_constr, u2_has_constr;
513         arch_register_req_t req;
514         const arch_env_t *aenv = u1->co->aenv;
515
516         /* Units with constraints come first */
517         u1_has_constr = 0;
518         for (i=0; i<u1->node_count; ++i) {
519                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
520                 if (arch_register_req_is(&req, limited)) {
521                         u1_has_constr = 1;
522                         break;
523                 }
524         }
525
526         u2_has_constr = 0;
527         for (i=0; i<u2->node_count; ++i) {
528                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
529                 if (arch_register_req_is(&req, limited)) {
530                         u2_has_constr = 1;
531                         break;
532                 }
533         }
534
535         if (u1_has_constr != u2_has_constr)
536                 return u2_has_constr - u1_has_constr;
537
538         /* Now check, whether the two units are connected */
539 #if 0
540         for (i=0; i<u1->node_count; ++i)
541                 for (o=0; o<u2->node_count; ++o)
542                         if (u1->nodes[i] == u2->nodes[o])
543                                 return 0;
544 #endif
545
546         /* After all, the sort key decides. Greater keys come first. */
547         return u2->sort_key - u1->sort_key;
548
549 }
550
551 /**
552  * Sort the ou's according to constraints and their sort_key
553  */
554 static void co_sort_units(copy_opt_t *co) {
555         int i, count = 0, costs;
556         unit_t *ou, **ous;
557
558         /* get the number of ous, remove them form the list and fill the array */
559         list_for_each_entry(unit_t, ou, &co->units, units)
560                 count++;
561         ous = alloca(count * sizeof(*ous));
562
563         costs = co_get_max_copy_costs(co);
564
565         i = 0;
566         list_for_each_entry(unit_t, ou, &co->units, units)
567                 ous[i++] = ou;
568
569         INIT_LIST_HEAD(&co->units);
570
571         assert(count == i && list_empty(&co->units));
572
573         for (i=0; i<count; ++i)
574                 ir_printf("%+F\n", ous[i]->nodes[0]);
575
576         qsort(ous, count, sizeof(*ous), compare_ous);
577
578         ir_printf("\n\n");
579         for (i=0; i<count; ++i)
580                 ir_printf("%+F\n", ous[i]->nodes[0]);
581
582         /* reinsert into list in correct order */
583         for (i=0; i<count; ++i)
584                 list_add_tail(&ous[i]->units, &co->units);
585
586         assert(costs == co_get_max_copy_costs(co));
587 }
588 #endif
589
590 void co_build_ou_structure(copy_opt_t *co) {
591         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
592         INIT_LIST_HEAD(&co->units);
593         irg_walk_graph(co->irg, co_collect_units, NULL, co);
594 #ifdef QUICK_AND_DIRTY_HACK
595         co_sort_units(co);
596 #endif
597 }
598
599 void co_free_ou_structure(copy_opt_t *co) {
600         unit_t *curr, *tmp;
601         ASSERT_OU_AVAIL(co);
602         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
603                 xfree(curr->nodes);
604                 xfree(curr->costs);
605                 xfree(curr);
606         }
607         co->units.next = NULL;
608 }
609
610 /* co_solve_heuristic() is implemented in becopyheur.c */
611
612 int co_get_max_copy_costs(const copy_opt_t *co) {
613         int i, res = 0;
614         unit_t *curr;
615
616         ASSERT_OU_AVAIL(co);
617
618         list_for_each_entry(unit_t, curr, &co->units, units) {
619                 res += curr->inevitable_costs;
620                 for (i=1; i<curr->node_count; ++i)
621                         res += curr->costs[i];
622         }
623         return res;
624 }
625
626 int co_get_inevit_copy_costs(const copy_opt_t *co) {
627         int res = 0;
628         unit_t *curr;
629
630         ASSERT_OU_AVAIL(co);
631
632         list_for_each_entry(unit_t, curr, &co->units, units)
633                 res += curr->inevitable_costs;
634         return res;
635 }
636
637 int co_get_copy_costs(const copy_opt_t *co) {
638         int i, res = 0;
639         unit_t *curr;
640
641         ASSERT_OU_AVAIL(co);
642
643         list_for_each_entry(unit_t, curr, &co->units, units) {
644                 int root_col = get_irn_col(co, curr->nodes[0]);
645                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
646                 res += curr->inevitable_costs;
647                 for (i=1; i<curr->node_count; ++i) {
648                         int arg_col = get_irn_col(co, curr->nodes[i]);
649                         if (root_col != arg_col) {
650                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
651                                 res += curr->costs[i];
652                         }
653                 }
654         }
655         return res;
656 }
657
658 int co_get_lower_bound(const copy_opt_t *co) {
659         int res = 0;
660         unit_t *curr;
661
662         ASSERT_OU_AVAIL(co);
663
664         list_for_each_entry(unit_t, curr, &co->units, units)
665                 res += curr->inevitable_costs + curr->min_nodes_costs;
666         return res;
667 }
668
669 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
670 {
671         bitset_t *seen = bitset_irg_malloc(co->irg);
672         affinity_node_t *an;
673
674         memset(stat, 0, sizeof(stat[0]));
675
676         /* count affinity edges. */
677         co_gs_foreach_aff_node(co, an) {
678                 neighb_t *neigh;
679                 stat->aff_nodes += 1;
680                 bitset_add_irn(seen, an->irn);
681                 co_gs_foreach_neighb(an, neigh) {
682                         if(!bitset_contains_irn(seen, neigh->irn)) {
683                                 stat->aff_edges += 1;
684                                 stat->max_costs += neigh->costs;
685
686                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
687                                         stat->costs += neigh->costs;
688                                         stat->unsatisfied_edges += 1;
689                                 }
690
691                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
692                                         stat->aff_int += 1;
693                                         stat->inevit_costs += neigh->costs;
694                                 }
695
696                         }
697                 }
698         }
699
700         bitset_free(seen);
701 }
702
703 /******************************************************************************
704    _____                 _        _____ _
705   / ____|               | |      / ____| |
706  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
707  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
708  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
709   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
710                   | |                                       __/ |
711                   |_|                                      |___/
712  ******************************************************************************/
713
714 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
715         const affinity_node_t *n1 = k1;
716         const affinity_node_t *n2 = k2;
717         (void) size;
718
719         return (n1->irn != n2->irn);
720 }
721
722 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
723         affinity_node_t new_node, *node;
724         neighb_t        *nbr;
725         int             allocnew = 1;
726
727         new_node.irn        = n1;
728         new_node.degree     = 0;
729         new_node.neighbours = NULL;
730         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
731
732         for (nbr = node->neighbours; nbr; nbr = nbr->next)
733                 if (nbr->irn == n2) {
734                         allocnew = 0;
735                         break;
736                 }
737
738         /* if we did not find n2 in n1's neighbourhood insert it */
739         if (allocnew) {
740                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
741                 nbr->irn   = n2;
742                 nbr->costs = 0;
743                 nbr->next  = node->neighbours;
744
745                 node->neighbours = nbr;
746                 node->degree++;
747         }
748
749         /* now nbr points to n1's neighbour-entry of n2 */
750         nbr->costs += costs;
751 }
752
753 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
754         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
755                 add_edge(co, n1, n2, costs);
756                 add_edge(co, n2, n1, costs);
757         }
758 }
759
760 static void build_graph_walker(ir_node *irn, void *env) {
761         copy_opt_t *co = env;
762         int pos, max;
763         const arch_register_t *reg;
764
765         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
766                 return;
767
768         reg = arch_get_irn_register(co->aenv, irn);
769         if (arch_register_type_is(reg, ignore))
770                 return;
771
772         /* Phis */
773         if (is_Reg_Phi(irn))
774                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
775                         ir_node *arg = get_irn_n(irn, pos);
776                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
777                 }
778
779         /* Perms */
780         else if (is_Perm_Proj(co->aenv, irn)) {
781                 ir_node *arg = get_Perm_src(irn);
782                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
783         }
784
785         /* 2-address code */
786         else {
787                 const arch_register_req_t *req =
788                         arch_get_register_req(co->aenv, irn, -1);
789                 if (is_2addr_code(req)) {
790                         ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
791                         if (! arch_irn_is(co->aenv, other, ignore))
792                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
793                 }
794         }
795 }
796
797 void co_build_graph_structure(copy_opt_t *co) {
798         obstack_init(&co->obst);
799         co->nodes = new_set(compare_affinity_node_t, 32);
800
801         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
802 }
803
804 void co_free_graph_structure(copy_opt_t *co) {
805         ASSERT_GS_AVAIL(co);
806
807         del_set(co->nodes);
808         obstack_free(&co->obst, NULL);
809         co->nodes = NULL;
810 }
811
812 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
813
814 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
815         affinity_node_t new_node, *n;
816
817         ASSERT_GS_AVAIL(co);
818
819         new_node.irn = irn;
820         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
821         if (n) {
822                 return (n->degree > 0);
823         } else
824                 return 0;
825 }
826
827 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
828 {
829         be_ifg_t *ifg  = co->cenv->ifg;
830         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
831
832         ir_node *irn;
833         void *it, *nit;
834         int i, n, n_regs;
835
836         n_regs = 0;
837         for(i = 0; i < co->cls->n_regs; ++i) {
838                 const arch_register_t *reg = &co->cls->regs[i];
839                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
840         }
841
842         /*
843          * n contains the first node number.
844          * the values below n are the pre-colored register nodes
845          */
846
847         it  = be_ifg_nodes_iter_alloca(ifg);
848         nit = be_ifg_neighbours_iter_alloca(ifg);
849
850         n = n_regs;
851         be_ifg_foreach_node(ifg, it, irn) {
852                 if(!arch_irn_is(co->aenv, irn, ignore))
853                         set_irn_link(irn, INT_TO_PTR(n++));
854         }
855
856         fprintf(f, "%d %d\n", n, n_regs);
857
858         be_ifg_foreach_node(ifg, it, irn) {
859                 if(!arch_irn_is(co->aenv, irn, ignore)) {
860                         int idx            = PTR_TO_INT(get_irn_link(irn));
861                         affinity_node_t *a = get_affinity_info(co, irn);
862
863                         const arch_register_req_t *req;
864                         ir_node *adj;
865
866                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
867                         if(arch_register_req_is(req, limited)) {
868                                 for(i = 0; i < co->cls->n_regs; ++i) {
869                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
870                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
871                                 }
872                         }
873
874
875                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
876                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
877                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
878                                         if(idx < adj_idx)
879                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
880                                 }
881                         }
882
883                         if(a) {
884                                 neighb_t *n;
885
886                                 co_gs_foreach_neighb(a, n) {
887                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
888                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
889                                                 if(idx < n_idx)
890                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
891                                         }
892                                 }
893                         }
894                 }
895         }
896 }
897
898 typedef struct _appel_clique_walker_t {
899         ir_phase ph;
900         const copy_opt_t *co;
901         int curr_nr;
902         int node_count;
903         FILE *f;
904         int dumb;
905         int *color_map;
906         struct obstack obst;
907 } appel_clique_walker_t;
908
909 typedef struct _appel_block_info_t {
910         int *live_end_nr;
911         int *live_in_nr;
912         int *phi_nr;
913         ir_node **live_end;
914         ir_node **live_in;
915         ir_node **phi;
916         int n_live_end;
917         int n_live_in;
918         int n_phi;
919 } appel_block_info_t;
920
921 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
922 {
923 #if 0
924         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
925         int res = (int) freq;
926         return res == 0 ? 1 : res;
927 #else
928         ir_loop *loop = get_irn_loop(bl);
929         (void) env;
930         if(loop) {
931                 int d = get_loop_depth(loop);
932                 return 1 + d * d;
933         }
934         return 1;
935 #endif
936 }
937
938 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
939 {
940         appel_block_info_t *res = NULL;
941         (void) old;
942
943         if(is_Block(irn)) {
944                 appel_clique_walker_t *d = (void *) phase;
945                 res = phase_alloc(phase, sizeof(res[0]));
946                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
947                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
948                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
949                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
950                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
951                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
952         }
953
954         return res;
955 }
956
957 typedef struct _insn_list_t {
958         be_insn_t *insn;
959         struct list_head list;
960 } insn_list_t;
961
962 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
963 {
964         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
965         int i;
966
967         for(i = 0; i < bli->n_live_end; ++i)
968                 if(bli->live_end[i] == irn)
969                         return bli->live_end_nr[i];
970
971         return -1;
972 }
973
974 static int appel_dump_clique(appel_clique_walker_t *env, const ir_nodeset_t *live, ir_node *bl, int curr_nr, int start_nr)
975 {
976         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
977         ir_node *irn;
978         int n_live;
979         int j;
980         ir_nodeset_iterator_t iter;
981
982         n_live = 0;
983         foreach_ir_nodeset(live, irn, iter)
984                 live_arr[n_live++] = irn;
985
986         /* dump the live after clique */
987         if(!env->dumb) {
988                 for(j = 0; j < n_live; ++j) {
989                         int k;
990
991                         for(k = j + 1; k < n_live; ++k) {
992                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
993                         }
994                         fprintf(env->f, "\n");
995                 }
996         }
997
998         /* dump the affinities */
999         for(j = 0; !env->dumb && j < n_live; ++j) {
1000                 ir_node *irn = live_arr[j];
1001                 int old_nr = PTR_TO_INT(get_irn_link(irn));
1002
1003                 /* if the node was already live in the last insn dump the affinity */
1004                 if(old_nr > start_nr) {
1005                         int weight = appel_aff_weight(env, bl);
1006                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
1007                 }
1008         }
1009
1010         /* set the current numbers into the link field. */
1011         for(j = 0; j < n_live; ++j) {
1012                 ir_node *irn = live_arr[j];
1013                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1014         }
1015
1016         return curr_nr + n_live;
1017 }
1018
1019 static void appel_walker(ir_node *bl, void *data)
1020 {
1021         appel_clique_walker_t *env = data;
1022         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1023         struct obstack *obst       = &env->obst;
1024         void *base                 = obstack_base(obst);
1025         ir_nodeset_t live;
1026         ir_nodeset_iterator_t iter;
1027         be_lv_t *lv                = env->co->cenv->birg->lv;
1028
1029         int n_insns  = 0;
1030         int n_nodes  = 0;
1031         int start_nr = env->curr_nr;
1032         int curr_nr  = start_nr;
1033
1034         be_insn_env_t insn_env;
1035         int i, j;
1036         ir_node *irn;
1037         be_insn_t **insns;
1038
1039         insn_env.aenv = env->co->aenv;
1040         insn_env.cls  = env->co->cls;
1041         insn_env.obst = obst;
1042         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1043
1044         /* Guess how many insns will be in this block. */
1045         sched_foreach(bl, irn)
1046                 n_nodes++;
1047
1048         bli->n_phi = 0;
1049         insns = xmalloc(n_nodes * sizeof(insns[0]));
1050
1051         /* Put all insns in an array. */
1052         irn = sched_first(bl);
1053         while(!sched_is_end(irn)) {
1054                 be_insn_t *insn;
1055                 insn = be_scan_insn(&insn_env, irn);
1056                 insns[n_insns++] = insn;
1057                 irn = insn->next_insn;
1058         }
1059
1060         DBG((dbg, LEVEL_2, "%+F\n", bl));
1061         ir_nodeset_init(&live);
1062         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, &live);
1063
1064         /* Generate the bad and ugly. */
1065         for(i = n_insns - 1; i >= 0; --i) {
1066                 be_insn_t *insn = insns[i];
1067
1068                 /* The first live set has to be saved in the block border set. */
1069                 if(i == n_insns - 1) {
1070                         j = 0;
1071                         foreach_ir_nodeset(&live, irn, iter) {
1072                                 bli->live_end[j]    = irn;
1073                                 bli->live_end_nr[j] = curr_nr + j;
1074                                 ++j;
1075                         }
1076                         bli->n_live_end = j;
1077                 }
1078
1079                 if(!env->dumb) {
1080                         for(j = 0; j < insn->use_start; ++j) {
1081                                 ir_node *op   = insn->ops[j].carrier;
1082                                 bitset_t *adm = insn->ops[j].regs;
1083                                 int k;
1084                                 size_t nr;
1085
1086                                 if(!insn->ops[j].has_constraints)
1087                                         continue;
1088
1089                                 nr = 0;
1090                                 foreach_ir_nodeset(&live, irn, iter) {
1091                                         if(irn == op) {
1092                                                 break;
1093                                         }
1094                                         ++nr;
1095                                 }
1096
1097                                 assert(nr < ir_nodeset_size(&live));
1098
1099                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1100                                         int mapped_col = env->color_map[k];
1101                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1102                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1103                                 }
1104                         }
1105                 }
1106
1107                 /* dump the clique and update the stuff. */
1108                 curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1109
1110                 /* remove all defs. */
1111                 for(j = 0; j < insn->use_start; ++j)
1112                         ir_nodeset_remove(&live, insn->ops[j].carrier);
1113
1114                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1115                         bli->phi[bli->n_phi]    = insn->irn;
1116                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1117                         bli->n_phi++;
1118                 }
1119
1120                 /* add all uses */
1121                 else
1122                         for(j = insn->use_start; j < insn->n_ops; ++j)
1123                                 ir_nodeset_insert(&live, insn->ops[j].carrier);
1124         }
1125
1126         /* print the start clique. */
1127         curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1128
1129         i = 0;
1130         foreach_ir_nodeset(&live, irn, iter) {
1131                 bli->live_in[i]    = irn;
1132                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1133                 ++i;
1134         }
1135         bli->n_live_in = i;
1136
1137         ir_nodeset_destroy(&live);
1138         free(insns);
1139         obstack_free(obst, base);
1140         env->curr_nr = curr_nr;
1141 }
1142
1143 static void appel_inter_block_aff(ir_node *bl, void *data)
1144 {
1145         appel_clique_walker_t *env = data;
1146         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1147
1148         int i, j, n;
1149
1150         for(i = 0; i < bli->n_live_in; ++i) {
1151                 ir_node *irn = bli->live_in[i];
1152
1153                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1154                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1155
1156                         int nr = appel_get_live_end_nr(env, pred, irn);
1157                         assert(nr >= 0);
1158                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1159                 }
1160         }
1161
1162         for(i = 0; i < bli->n_phi; ++i) {
1163                 ir_node *irn = bli->phi[i];
1164
1165                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1166                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1167                         ir_node *op = get_irn_n(irn, j);
1168
1169                         int nr = appel_get_live_end_nr(env, pred, op);
1170                         assert(nr >= 0);
1171                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1172                 }
1173         }
1174
1175 }
1176
1177 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1178 {
1179         int i;
1180         int n_colors;
1181         appel_clique_walker_t env;
1182         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1183         be_lv_t *lv = co->cenv->birg->lv;
1184
1185         be_liveness_recompute(lv);
1186         obstack_init(&env.obst);
1187         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1188         env.curr_nr = co->cls->n_regs;
1189         env.co = co;
1190         env.f = f;
1191
1192         bitset_copy(adm, co->cenv->ignore_colors);
1193         bitset_flip_all(adm);
1194
1195         /* Make color map. */
1196         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1197         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1198                 const arch_register_t *reg = &co->cls->regs[i];
1199                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1200         }
1201
1202         env.dumb = 1;
1203         env.curr_nr = n_colors;
1204         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1205         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1206
1207         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1208
1209         /* make the first k nodes interfere */
1210         for(i = 0; i < n_colors; ++i) {
1211                 int j;
1212                 for(j = i + 1; j < n_colors; ++j)
1213                         fprintf(f, "%d %d -1 ", i, j);
1214                 fprintf(f, "\n");
1215         }
1216
1217         env.dumb = 0;
1218         env.curr_nr = n_colors;
1219         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1220         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1221         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1222         obstack_free(&env.obst, NULL);
1223 }
1224
1225 /*
1226          ___ _____ ____   ____   ___ _____   ____                        _
1227         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1228          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1229          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1230         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1231                                                                   |_|            |___/
1232 */
1233
1234 static const char *get_dot_color_name(size_t col)
1235 {
1236         static const char *names[] = {
1237                 "blue",
1238                 "red",
1239                 "green",
1240                 "yellow",
1241                 "cyan",
1242                 "magenta",
1243                 "orange",
1244                 "chocolate",
1245                 "beige",
1246                 "navy",
1247                 "darkgreen",
1248                 "darkred",
1249                 "lightPink",
1250                 "chartreuse",
1251                 "lightskyblue",
1252                 "linen",
1253                 "pink",
1254                 "lightslateblue",
1255                 "mintcream",
1256                 "red",
1257                 "darkolivegreen",
1258                 "mediumblue",
1259                 "mistyrose",
1260                 "salmon",
1261                 "darkseagreen",
1262                 "mediumslateblue"
1263                 "moccasin",
1264                 "tomato",
1265                 "forestgreen",
1266                 "darkturquoise",
1267                 "palevioletred"
1268         };
1269
1270         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1271 }
1272
1273 typedef struct _co_ifg_dump_t {
1274         const copy_opt_t *co;
1275         unsigned flags;
1276 } co_ifg_dump_t;
1277
1278 static void ifg_dump_graph_attr(FILE *f, void *self)
1279 {
1280         (void) self;
1281         fprintf(f, "overlap=scale");
1282 }
1283
1284 static int ifg_is_dump_node(void *self, ir_node *irn)
1285 {
1286         co_ifg_dump_t *cod = self;
1287         return !arch_irn_is(cod->co->aenv, irn, ignore);
1288 }
1289
1290 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1291 {
1292         co_ifg_dump_t *env         = self;
1293         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1294         const arch_register_req_t *req;
1295         int limited;
1296
1297         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1298         limited = arch_register_req_is(req, limited);
1299
1300         if(env->flags & CO_IFG_DUMP_LABELS) {
1301                 ir_fprintf(f, "label=\"%+F", irn);
1302
1303 #if 0
1304                 // TODO fix this...
1305                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1306                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1307                         req.limited(req.limited_env, bs);
1308                         ir_fprintf(f, "\\n%B", bs);
1309                 }
1310 #endif
1311                 ir_fprintf(f, "\" ");
1312         } else {
1313                 fprintf(f, "label=\"\" shape=point " );
1314         }
1315
1316         if(env->flags & CO_IFG_DUMP_SHAPE)
1317                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1318
1319         if(env->flags & CO_IFG_DUMP_COLORS)
1320                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1321 }
1322
1323 static void ifg_dump_at_end(FILE *file, void *self)
1324 {
1325         co_ifg_dump_t *env = self;
1326         affinity_node_t *a;
1327
1328         co_gs_foreach_aff_node(env->co, a) {
1329                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1330                 unsigned aidx = get_irn_idx(a->irn);
1331                 neighb_t *n;
1332
1333                 co_gs_foreach_neighb(a, n) {
1334                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1335                         unsigned nidx = get_irn_idx(n->irn);
1336
1337                         if(aidx < nidx) {
1338                                 const char *color = nr == ar ? "blue" : "red";
1339                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1340                                 if(env->flags & CO_IFG_DUMP_LABELS)
1341                                         fprintf(file, "label=\"%d\" ", n->costs);
1342                                 if(env->flags & CO_IFG_DUMP_COLORS)
1343                                         fprintf(file, "color=%s ", color);
1344                                 else
1345                                         fprintf(file, "style=dotted");
1346                                 fprintf(file, "];\n");
1347                         }
1348                 }
1349         }
1350 }
1351
1352
1353 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1354         ifg_is_dump_node,
1355         ifg_dump_graph_attr,
1356         ifg_dump_node_attr,
1357         NULL,
1358         NULL,
1359         ifg_dump_at_end
1360 };
1361
1362
1363
1364 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1365 {
1366         co_ifg_dump_t cod;
1367
1368         cod.co    = co;
1369         cod.flags = flags;
1370         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1371 }
1372
1373
1374 void co_solve_park_moon(copy_opt_t *opt)
1375 {
1376         (void) opt;
1377 }
1378
1379 static int void_algo(copy_opt_t *co)
1380 {
1381         (void) co;
1382         return 0;
1383 }
1384
1385 /*
1386                 _    _                  _ _   _
1387            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1388           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1389          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1390         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1391                            |___/
1392 */
1393
1394 typedef struct {
1395         co_algo_t  *algo;
1396         const char *name;
1397         int        can_improve_existing;
1398 } co_algo_info_t;
1399
1400 static co_algo_info_t algos[] = {
1401         { void_algo,               "none",  0 },
1402         { co_solve_heuristic,      "heur1", 0 },
1403         { co_solve_heuristic_new,  "heur2", 0 },
1404         { co_solve_heuristic_java, "heur3", 0 },
1405         { co_solve_heuristic_mst,  "heur4", 0 },
1406 #ifdef WITH_ILP
1407         { co_solve_ilp2,           "ilp",   1 },
1408 #endif
1409         { NULL,                    "",      0 }
1410 };
1411
1412 /*
1413     __  __       _         ____       _
1414    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1415    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1416    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1417    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1418
1419 */
1420
1421 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1422 {
1423         FILE *result;
1424         char buf[1024];
1425
1426         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1427         result = fopen(buf, "wt");
1428         if(result == NULL) {
1429                 panic("Couldn't open '%s' for writing.", buf);
1430         }
1431
1432         return result;
1433 }
1434
1435 void co_driver(be_chordal_env_t *cenv)
1436 {
1437         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1438         co_complete_stats_t before, after;
1439         copy_opt_t          *co;
1440         co_algo_t           *algo_func;
1441         int                 was_optimal = 0;
1442
1443         if (algo >= CO_ALGO_LAST)
1444                 return;
1445
1446         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1447
1448         co = new_copy_opt(cenv, cost_func);
1449         co_build_ou_structure(co);
1450         co_build_graph_structure(co);
1451
1452         co_complete_stats(co, &before);
1453
1454         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1455         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1456         be_stat_ev_ull("co_max_costs",    before.max_costs);
1457         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1458         be_stat_ev_ull("co_aff_int",      before.aff_int);
1459
1460         be_stat_ev_ull("co_init_costs",   before.costs);
1461         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1462
1463         /* Dump the interference graph in Appel's format. */
1464         if (dump_flags & DUMP_APPEL) {
1465                 FILE *f = my_open(cenv, "", ".apl");
1466                 co_dump_appel_graph(co, f);
1467                 fclose(f);
1468         }
1469
1470         if (dump_flags & DUMP_BEFORE) {
1471                 FILE *f = my_open(cenv, "", "-before.dot");
1472                 co_dump_ifg_dot(co, f, style_flags);
1473                 fclose(f);
1474         }
1475
1476         /* if the algo can improve results, provide an initial solution with heur3 */
1477         if (improve && algos[algo].can_improve_existing) {
1478                 co_complete_stats_t stats;
1479
1480                 /* produce a heuristic solution */
1481 #ifdef WITH_JVM
1482                 co_solve_heuristic_java(co);
1483 #else
1484                 co_solve_heuristic(co);
1485 #endif /* WITH_JVM */
1486
1487                 /* do the stats and provide the current costs */
1488                 co_complete_stats(co, &stats);
1489                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1490         }
1491
1492 #ifdef WITH_JVM
1493         /* start the JVM here so that it does not tamper the timing. */
1494         if (algo == CO_ALGO_HEUR3)
1495                 be_java_coal_start_jvm();
1496 #endif /* WITH_JVM */
1497
1498         algo_func = algos[algo].algo;
1499
1500         /* perform actual copy minimization */
1501         lc_timer_reset_and_start(timer);
1502         was_optimal = algo_func(co);
1503         lc_timer_stop(timer);
1504
1505         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1506         be_stat_ev_ull("co_optimal", was_optimal);
1507
1508         if (dump_flags & DUMP_AFTER) {
1509                 FILE *f = my_open(cenv, "", "-after.dot");
1510                 co_dump_ifg_dot(co, f, style_flags);
1511                 fclose(f);
1512         }
1513
1514         co_complete_stats(co, &after);
1515
1516         if (do_stats) {
1517                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1518                 ulong64 evitable          = after.costs     - after.inevit_costs;
1519
1520                 ir_printf("%30F ", cenv->irg);
1521                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1522
1523                 if(optimizable_costs > 0)
1524                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1525                 else
1526                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1527         }
1528
1529         be_stat_ev_ull("co_after_costs", after.costs);
1530         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1531
1532         co_free_graph_structure(co);
1533         co_free_ou_structure(co);
1534         free_copy_opt(co);
1535 }