8df101d075821254781ac1040426ef914c24b828
[libfirm] / ir / be / becopyopt.c
1 /**
2  * Author:      Daniel Grund
3  * Date:                12.04.2005
4  * Copyright:   (c) Universitaet Karlsruhe
5  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
6  */
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 #ifdef HAVE_ALLOCA_H
11 #include <alloca.h>
12 #endif
13 #ifdef HAVE_MALLOC_H
14 #include <malloc.h>
15 #endif
16
17 #include "execfreq.h"
18 #include "xmalloc.h"
19 #include "debug.h"
20 #include "pmap.h"
21 #include "irgraph.h"
22 #include "irgwalk.h"
23 #include "irprog.h"
24 #include "irloop_t.h"
25 #include "iredges_t.h"
26 #include "phiclass.h"
27 #include "irbitset.h"
28 #include "irphase_t.h"
29 #include "irprintf_t.h"
30
31
32 #include "bearch.h"
33 #include "benode_t.h"
34 #include "beutil.h"
35 #include "beifg_t.h"
36 #include "becopyopt_t.h"
37 #include "becopystat.h"
38 #include "belive_t.h"
39 #include "beinsn_t.h"
40 #include "besched_t.h"
41 #include "benodesets.h"
42 #include "bejavacoal.h"
43 #include "bestatevent.h"
44
45 #ifdef WITH_LIBCORE
46 #include <libcore/lc_timing.h>
47 #include <libcore/lc_opts.h>
48 #endif /* WITH_LIBCORE */
49
50 #define DUMP_BEFORE 1
51 #define DUMP_AFTER  2
52 #define DUMP_APPEL  4
53 #define DUMP_ALL    2 * DUMP_APPEL - 1
54
55 #define COST_FUNC_FREQ     1
56 #define COST_FUNC_LOOP     2
57 #define COST_FUNC_ALL_ONE  3
58
59 static int dump_flags         = 0;
60 static int style_flags        = 0;
61 static int do_stats           = 0;
62 static cost_fct_t cost_func   = co_get_costs_exec_freq;
63 static int algo               = CO_ALGO_HEUR2;
64 static int improve            = 1;
65
66 #ifdef WITH_LIBCORE
67 static const lc_opt_enum_mask_items_t dump_items[] = {
68         { "before",  DUMP_BEFORE },
69         { "after",   DUMP_AFTER  },
70         { "appel",   DUMP_APPEL  },
71         { "all",     DUMP_ALL    },
72         { NULL,      0 }
73 };
74
75 static const lc_opt_enum_mask_items_t style_items[] = {
76         { "color",   CO_IFG_DUMP_COLORS },
77         { "labels",  CO_IFG_DUMP_LABELS },
78         { "constr",  CO_IFG_DUMP_CONSTR },
79         { "shape",   CO_IFG_DUMP_SHAPE  },
80         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
81         { NULL,      0 }
82 };
83
84 static const lc_opt_enum_mask_items_t algo_items[] = {
85         { "none",   CO_ALGO_NONE  },
86         { "heur",   CO_ALGO_HEUR  },
87         { "heur2",  CO_ALGO_HEUR2 },
88         { "heur3",  CO_ALGO_HEUR3 },
89         { "ilp",    CO_ALGO_ILP   },
90         { NULL,     0 }
91 };
92
93 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
94         { "freq",   co_get_costs_exec_freq },
95         { "loop",   co_get_costs_loop_depth },
96         { "one",    co_get_costs_all_one },
97         { NULL,     0 }
98 };
99
100 static lc_opt_enum_mask_var_t dump_var = {
101         &dump_flags, dump_items
102 };
103
104 static lc_opt_enum_mask_var_t style_var = {
105         &style_flags, style_items
106 };
107
108 static lc_opt_enum_mask_var_t algo_var = {
109         &algo, algo_items
110 };
111
112 static lc_opt_enum_func_ptr_var_t cost_func_var = {
113         &cost_func, cost_func_items
114 };
115
116 static const lc_opt_table_entry_t options[] = {
117         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo (heur, heur2, heur3, ilp)", &algo_var),
118         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function (freq, loop, one)",                &cost_func_var),
119         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
120         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
121         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
122         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
123         { NULL }
124 };
125
126 /* Insert additional options registration functions here. */
127 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
128 extern void be_co2_register_options(lc_opt_entry_t *grp);
129 extern void be_co3_register_options(lc_opt_entry_t *grp);
130
131 void co_register_options(lc_opt_entry_t *grp)
132 {
133         lc_opt_entry_t *co_grp = lc_opt_get_grp(grp, "co");
134         lc_opt_add_table(co_grp, options);
135
136         be_co2_register_options(co_grp);
137         be_co3_register_options(co_grp);
138 #ifdef WITH_ILP
139         be_co_ilp_register_options(co_grp);
140 #endif
141 }
142 #endif
143
144
145 #undef QUICK_AND_DIRTY_HACK
146
147 /******************************************************************************
148     _____                           _
149    / ____|                         | |
150   | |  __  ___ _ __   ___ _ __ __ _| |
151   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
152   | |__| |  __/ | | |  __/ | | (_| | |
153    \_____|\___|_| |_|\___|_|  \__,_|_|
154
155  ******************************************************************************/
156
157 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
158
159
160 void be_copy_opt_init(void) {
161 }
162
163 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
164 {
165         const char *s1, *s2, *s3;
166         int len;
167         copy_opt_t *co;
168
169         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
170
171         co = xcalloc(1, sizeof(*co));
172         co->cenv      = chordal_env;
173         co->aenv      = chordal_env->birg->main_env->arch_env;
174         co->irg       = chordal_env->irg;
175         co->cls       = chordal_env->cls;
176         co->get_costs = get_costs;
177
178         s1 = get_irp_prog_name();
179         s2 = get_entity_name(get_irg_entity(co->irg));
180         s3 = chordal_env->cls->name;
181         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
182         co->name = xmalloc(len);
183         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
184
185         return co;
186 }
187
188 void free_copy_opt(copy_opt_t *co) {
189         xfree(co->name);
190         free(co);
191 }
192
193 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
194         arch_register_req_t req;
195         const arch_register_t *reg;
196
197         if (arch_irn_is(co->aenv, irn, ignore))
198                 return 0;
199
200         reg = arch_get_irn_register(co->aenv, irn);
201         if (arch_register_type_is(reg, ignore))
202                 return 0;
203
204         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(co->aenv, irn, &req))
205                 return 1;
206
207         return 0;
208 }
209
210 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
211         const ir_edge_t *edge;
212         const arch_register_t *reg;
213
214         assert(0 && "Is buggy and obsolete. Do not use");
215
216         if (arch_irn_is(co->aenv, irn, ignore))
217                 return 0;
218
219         reg = arch_get_irn_register(co->aenv, irn);
220         if (arch_register_type_is(reg, ignore))
221                 return 0;
222
223         foreach_out_edge(irn, edge) {
224                 ir_node *n = edge->src;
225
226                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
227                         arch_register_req_t req;
228                         arch_get_register_req(co->aenv, &req, n, -1);
229
230                         if(is_Reg_Phi(n) ||
231                            is_Perm(co->aenv, n) ||
232                            (arch_register_req_is(&req, should_be_same) && req.other_same == irn)
233                           )
234                                 return 1;
235                 }
236         }
237
238         return 0;
239 }
240
241 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
242         int cost = 0;
243         ir_loop *loop;
244         ir_node *root_block = get_nodes_block(root);
245
246         if (is_Phi(root)) {
247                 /* for phis the copies are placed in the corresponding pred-block */
248                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
249         } else {
250                 /* a perm places the copy in the same block as it resides */
251                 loop = get_irn_loop(root_block);
252         }
253         if (loop) {
254                 int d = get_loop_depth(loop);
255                 cost = d*d;
256         }
257         return 1+cost;
258 }
259
260 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
261         int res;
262         ir_node *root_bl = get_nodes_block(root);
263         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
264         res = get_block_execfreq_ulong(co->cenv->exec_freq, copy_bl);
265
266         /* don't allow values smaller than one. */
267         return res < 1 ? 1 : res;
268 }
269
270
271 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
272         return 1;
273 }
274
275 /******************************************************************************
276    ____        _   _    _       _ _          _____ _
277   / __ \      | | | |  | |     (_) |        / ____| |
278  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
279  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
280  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
281   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
282         | |                                                            __/ |
283         |_|                                                           |___/
284  ******************************************************************************/
285
286 /**
287  * Determines a maximum weighted independent set with respect to
288  * the interference and conflict edges of all nodes in a qnode.
289  */
290 static int ou_max_ind_set_costs(unit_t *ou) {
291         be_chordal_env_t *chordal_env = ou->co->cenv;
292         ir_node **safe, **unsafe;
293         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
294         bitset_t *curr;
295         int max, pos, curr_weight, best_weight = 0;
296
297         /* assign the nodes into two groups.
298          * safe: node has no interference, hence it is in every max stable set.
299          * unsafe: node has an interference
300          */
301         safe = alloca((ou->node_count-1) * sizeof(*safe));
302         safe_costs = 0;
303         safe_count = 0;
304         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
305         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
306         unsafe_count = 0;
307         for(i=1; i<ou->node_count; ++i) {
308                 int is_safe = 1;
309                 for(o=1; o<ou->node_count; ++o) {
310                         if (i==o)
311                                 continue;
312                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
313                                 unsafe_costs[unsafe_count] = ou->costs[i];
314                                 unsafe[unsafe_count] = ou->nodes[i];
315                                 ++unsafe_count;
316                                 is_safe = 0;
317                                 break;
318                         }
319                 }
320                 if (is_safe) {
321                         safe_costs += ou->costs[i];
322                         safe[safe_count++] = ou->nodes[i];
323                 }
324         }
325
326
327         /* now compute the best set out of the unsafe nodes*/
328         if (unsafe_count > MIS_HEUR_TRIGGER) {
329                 bitset_t *best = bitset_alloca(unsafe_count);
330                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
331                 for (i=0; i<unsafe_count; ++i) {
332                         bitset_set(best, i);
333                         /* check if it is a stable set */
334                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
335                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
336                                         bitset_clear(best, i); /* clear the bit and try next one */
337                                         break;
338                                 }
339                 }
340                 /* compute the weight */
341                 bitset_foreach(best, pos)
342                         best_weight += unsafe_costs[pos];
343         } else {
344                 /* Exact Algorithm: Brute force */
345                 curr = bitset_alloca(unsafe_count);
346                 bitset_set_all(curr);
347                 while ((max = bitset_popcnt(curr)) != 0) {
348                         /* check if curr is a stable set */
349                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
350                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
351                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
352                                                         goto no_stable_set;
353
354                         /* if we arrive here, we have a stable set */
355                         /* compute the weigth of the stable set*/
356                         curr_weight = 0;
357                         bitset_foreach(curr, pos)
358                                 curr_weight += unsafe_costs[pos];
359
360                         /* any better ? */
361                         if (curr_weight > best_weight) {
362                                 best_weight = curr_weight;
363                         }
364
365         no_stable_set:
366                         bitset_minus1(curr);
367                 }
368         }
369
370         return safe_costs+best_weight;
371 }
372
373 static void co_collect_units(ir_node *irn, void *env) {
374         copy_opt_t *co = env;
375         unit_t *unit;
376         arch_register_req_t req;
377
378         if (!is_curr_reg_class(co, irn))
379                 return;
380         if (!co_is_optimizable_root(co, irn))
381                 return;
382
383         /* Init a new unit */
384         unit = xcalloc(1, sizeof(*unit));
385         unit->co = co;
386         unit->node_count = 1;
387         INIT_LIST_HEAD(&unit->queue);
388
389         /* Phi with some/all of its arguments */
390         if (is_Reg_Phi(irn)) {
391                 int i, arity;
392
393                 /* init */
394                 arity = get_irn_arity(irn);
395                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
396                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
397                 unit->nodes[0] = irn;
398
399                 /* fill */
400                 for (i=0; i<arity; ++i) {
401                         int o, arg_pos;
402                         ir_node *arg = get_irn_n(irn, i);
403
404                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
405                         if (arg == irn)
406                                 continue;
407                         if (nodes_interfere(co->cenv, irn, arg)) {
408                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
409                                 continue;
410                         }
411
412                         /* Else insert the argument of the phi to the members of this ou */
413                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
414
415                         /* Check if arg has occurred at a prior position in the arg/list */
416                         arg_pos = 0;
417                         for (o=0; o<unit->node_count; ++o)
418                                 if (unit->nodes[o] == arg) {
419                                         arg_pos = o;
420                                         break;
421                                 }
422
423                         if (!arg_pos) { /* a new argument */
424                                 /* insert node, set costs */
425                                 unit->nodes[unit->node_count] = arg;
426                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
427                                 unit->node_count++;
428                         } else { /* arg has occured before in same phi */
429                                 /* increase costs for existing arg */
430                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
431                         }
432                 }
433                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
434                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
435         } else
436
437         /* Proj of a perm with corresponding arg */
438         if (is_Perm_Proj(co->aenv, irn)) {
439                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
440                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
441                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
442                 unit->node_count = 2;
443                 unit->nodes[0] = irn;
444                 unit->nodes[1] = get_Perm_src(irn);
445                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
446         } else
447
448         /* Src == Tgt of a 2-addr-code instruction */
449         if (is_2addr_code(co->aenv, irn, &req)) {
450                 ir_node *other = req.other_same;
451                 if (!nodes_interfere(co->cenv, irn, other)) {
452                         unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
453                         unit->costs = xmalloc(2 * sizeof(*unit->costs));
454                         unit->node_count = 2;
455                         unit->nodes[0] = irn;
456                         unit->nodes[1] = other;
457                         unit->costs[1] = co->get_costs(co, irn, other, -1);
458                 }
459         } else
460                 assert(0 && "This is not an optimizable node!");
461
462         /* Insert the new unit at a position according to its costs */
463         if (unit->node_count > 1) {
464                 int i;
465                 struct list_head *tmp;
466
467                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
468                 for(i=1; i<unit->node_count; ++i) {
469                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
470                         unit->all_nodes_costs += unit->costs[i];
471                 }
472
473                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
474                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
475                 /* Insert the new ou according to its sort_key */
476                 tmp = &co->units;
477                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
478                         tmp = tmp->next;
479                 list_add(&unit->units, tmp);
480         } else {
481                 free(unit);
482         }
483 }
484
485 #ifdef QUICK_AND_DIRTY_HACK
486
487 static int compare_ous(const void *k1, const void *k2) {
488         const unit_t *u1 = *((const unit_t **) k1);
489         const unit_t *u2 = *((const unit_t **) k2);
490         int i, o, u1_has_constr, u2_has_constr;
491         arch_register_req_t req;
492         const arch_env_t *aenv = u1->co->aenv;
493
494         /* Units with constraints come first */
495         u1_has_constr = 0;
496         for (i=0; i<u1->node_count; ++i) {
497                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
498                 if (arch_register_req_is(&req, limited)) {
499                         u1_has_constr = 1;
500                         break;
501                 }
502         }
503
504         u2_has_constr = 0;
505         for (i=0; i<u2->node_count; ++i) {
506                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
507                 if (arch_register_req_is(&req, limited)) {
508                         u2_has_constr = 1;
509                         break;
510                 }
511         }
512
513         if (u1_has_constr != u2_has_constr)
514                 return u2_has_constr - u1_has_constr;
515
516         /* Now check, whether the two units are connected */
517 #if 0
518         for (i=0; i<u1->node_count; ++i)
519                 for (o=0; o<u2->node_count; ++o)
520                         if (u1->nodes[i] == u2->nodes[o])
521                                 return 0;
522 #endif
523
524         /* After all, the sort key decides. Greater keys come first. */
525         return u2->sort_key - u1->sort_key;
526
527 }
528
529 /**
530  * Sort the ou's according to constraints and their sort_key
531  */
532 static void co_sort_units(copy_opt_t *co) {
533         int i, count = 0, costs;
534         unit_t *ou, **ous;
535
536         /* get the number of ous, remove them form the list and fill the array */
537         list_for_each_entry(unit_t, ou, &co->units, units)
538                 count++;
539         ous = alloca(count * sizeof(*ous));
540
541         costs = co_get_max_copy_costs(co);
542
543         i = 0;
544         list_for_each_entry(unit_t, ou, &co->units, units)
545                 ous[i++] = ou;
546
547         INIT_LIST_HEAD(&co->units);
548
549         assert(count == i && list_empty(&co->units));
550
551         for (i=0; i<count; ++i)
552                 ir_printf("%+F\n", ous[i]->nodes[0]);
553
554         qsort(ous, count, sizeof(*ous), compare_ous);
555
556         ir_printf("\n\n");
557         for (i=0; i<count; ++i)
558                 ir_printf("%+F\n", ous[i]->nodes[0]);
559
560         /* reinsert into list in correct order */
561         for (i=0; i<count; ++i)
562                 list_add_tail(&ous[i]->units, &co->units);
563
564         assert(costs == co_get_max_copy_costs(co));
565 }
566 #endif
567
568 void co_build_ou_structure(copy_opt_t *co) {
569         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
570         INIT_LIST_HEAD(&co->units);
571         irg_walk_graph(co->irg, co_collect_units, NULL, co);
572 #ifdef QUICK_AND_DIRTY_HACK
573         co_sort_units(co);
574 #endif
575 }
576
577 void co_free_ou_structure(copy_opt_t *co) {
578         unit_t *curr, *tmp;
579         ASSERT_OU_AVAIL(co);
580         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
581                 xfree(curr->nodes);
582                 xfree(curr->costs);
583                 xfree(curr);
584         }
585         co->units.next = NULL;
586 }
587
588 /* co_solve_heuristic() is implemented in becopyheur.c */
589
590 int co_get_max_copy_costs(const copy_opt_t *co) {
591         int i, res = 0;
592         unit_t *curr;
593
594         ASSERT_OU_AVAIL(co);
595
596         list_for_each_entry(unit_t, curr, &co->units, units) {
597                 res += curr->inevitable_costs;
598                 for (i=1; i<curr->node_count; ++i)
599                         res += curr->costs[i];
600         }
601         return res;
602 }
603
604 int co_get_inevit_copy_costs(const copy_opt_t *co) {
605         int res = 0;
606         unit_t *curr;
607
608         ASSERT_OU_AVAIL(co);
609
610         list_for_each_entry(unit_t, curr, &co->units, units)
611                 res += curr->inevitable_costs;
612         return res;
613 }
614
615 int co_get_copy_costs(const copy_opt_t *co) {
616         int i, res = 0;
617         unit_t *curr;
618
619         ASSERT_OU_AVAIL(co);
620
621         list_for_each_entry(unit_t, curr, &co->units, units) {
622                 int root_col = get_irn_col(co, curr->nodes[0]);
623                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
624                 res += curr->inevitable_costs;
625                 for (i=1; i<curr->node_count; ++i) {
626                         int arg_col = get_irn_col(co, curr->nodes[i]);
627                         if (root_col != arg_col) {
628                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
629                                 res += curr->costs[i];
630                         }
631                 }
632         }
633         return res;
634 }
635
636 int co_get_lower_bound(const copy_opt_t *co) {
637         int res = 0;
638         unit_t *curr;
639
640         ASSERT_OU_AVAIL(co);
641
642         list_for_each_entry(unit_t, curr, &co->units, units)
643                 res += curr->inevitable_costs + curr->min_nodes_costs;
644         return res;
645 }
646
647 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
648 {
649         bitset_t *seen = bitset_irg_malloc(co->irg);
650         affinity_node_t *an;
651
652         memset(stat, 0, sizeof(stat[0]));
653
654         /* count affinity edges. */
655         co_gs_foreach_aff_node(co, an) {
656                 neighb_t *neigh;
657                 stat->aff_nodes += 1;
658                 bitset_add_irn(seen, an->irn);
659                 co_gs_foreach_neighb(an, neigh) {
660                         if(!bitset_contains_irn(seen, neigh->irn)) {
661                                 stat->aff_edges += 1;
662                                 stat->max_costs += neigh->costs;
663
664                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
665                                         stat->costs += neigh->costs;
666                                         stat->unsatisfied_edges += 1;
667                                 }
668
669                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
670                                         stat->aff_int += 1;
671                                         stat->inevit_costs += neigh->costs;
672                                 }
673
674                         }
675                 }
676         }
677
678         bitset_free(seen);
679 }
680
681 /******************************************************************************
682    _____                 _        _____ _
683   / ____|               | |      / ____| |
684  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
685  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
686  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
687   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
688                   | |                                       __/ |
689                   |_|                                      |___/
690  ******************************************************************************/
691
692 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
693         const affinity_node_t *n1 = k1;
694         const affinity_node_t *n2 = k2;
695
696         return (n1->irn != n2->irn);
697 }
698
699 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
700         affinity_node_t new_node, *node;
701         neighb_t new_nbr, *nbr;
702         int allocnew;
703
704         new_node.irn        = n1;
705         new_node.degree     = 0;
706         new_node.neighbours = NULL;
707         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
708
709         allocnew = 1;
710         for (nbr = node->neighbours; nbr; nbr = nbr->next)
711                 if (nbr->irn == n2) {
712                         allocnew = 0;
713                         break;
714                 }
715
716         /* if we did not find n2 in n1's neighbourhood insert it */
717         if (allocnew) {
718                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
719                 nbr = obstack_finish(&co->obst);
720                 nbr->irn   = n2;
721                 nbr->costs = 0;
722                 nbr->next  = node->neighbours;
723                 node->neighbours = nbr;
724                 node->degree++;
725         }
726
727         /* now nbr points to n1's neighbour-entry of n2 */
728         nbr->costs += costs;
729 }
730
731 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
732         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
733                 add_edge(co, n1, n2, costs);
734                 add_edge(co, n2, n1, costs);
735         }
736 }
737
738 static void build_graph_walker(ir_node *irn, void *env) {
739         copy_opt_t *co = env;
740         int pos, max;
741         arch_register_req_t req;
742         const arch_register_t *reg;
743
744         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
745                 return;
746
747         reg = arch_get_irn_register(co->aenv, irn);
748         if (arch_register_type_is(reg, ignore))
749                 return;
750
751         /* Phis */
752         if (is_Reg_Phi(irn))
753                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
754                         ir_node *arg = get_irn_n(irn, pos);
755                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
756                 }
757
758         /* Perms */
759         else if (is_Perm_Proj(co->aenv, irn)) {
760                 ir_node *arg = get_Perm_src(irn);
761                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
762         }
763
764         /* 2-address code */
765         else if (is_2addr_code(co->aenv, irn, &req))
766                 add_edges(co, irn, req.other_same, co->get_costs(co, irn, req.other_same, 0));
767 }
768
769 void co_build_graph_structure(copy_opt_t *co) {
770         obstack_init(&co->obst);
771         co->nodes = new_set(compare_affinity_node_t, 32);
772
773         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
774 }
775
776 void co_free_graph_structure(copy_opt_t *co) {
777         ASSERT_GS_AVAIL(co);
778
779         del_set(co->nodes);
780         obstack_free(&co->obst, NULL);
781         co->nodes = NULL;
782 }
783
784 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
785
786 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
787         affinity_node_t new_node, *n;
788
789         ASSERT_GS_AVAIL(co);
790
791         new_node.irn = irn;
792         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
793         if (n) {
794                 return (n->degree > 0);
795         } else
796                 return 0;
797 }
798
799 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
800 {
801         be_ifg_t *ifg  = co->cenv->ifg;
802         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
803         bitset_t *adm  = bitset_alloca(co->cls->n_regs);
804
805         ir_node *irn;
806         void *it, *nit;
807         int i, n, n_regs;
808
809         n_regs = 0;
810         for(i = 0; i < co->cls->n_regs; ++i) {
811                 const arch_register_t *reg = &co->cls->regs[i];
812                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
813         }
814
815         /*
816          * n contains the first node number.
817          * the values below n are the pre-colored register nodes
818          */
819
820         it  = be_ifg_nodes_iter_alloca(ifg);
821         nit = be_ifg_neighbours_iter_alloca(ifg);
822
823         n = n_regs;
824         be_ifg_foreach_node(ifg, it, irn) {
825                 if(!arch_irn_is(co->aenv, irn, ignore))
826                         set_irn_link(irn, INT_TO_PTR(n++));
827         }
828
829         fprintf(f, "%d %d\n", n, n_regs);
830
831         be_ifg_foreach_node(ifg, it, irn) {
832                 if(!arch_irn_is(co->aenv, irn, ignore)) {
833                         int idx            = PTR_TO_INT(get_irn_link(irn));
834                         affinity_node_t *a = get_affinity_info(co, irn);
835
836                         arch_register_req_t req;
837                         ir_node *adj;
838
839                         arch_get_register_req(co->aenv, &req, irn, BE_OUT_POS(0));
840                         if(arch_register_req_is(&req, limited)) {
841                                 bitset_clear_all(adm);
842                                 req.limited(req.limited_env, adm);
843                                 for(i = 0; i < co->cls->n_regs; ++i)
844                                         if(!bitset_is_set(adm, i) && color_map[i] >= 0)
845                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
846
847                         }
848
849
850                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
851                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
852                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
853                                         if(idx < adj_idx)
854                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
855                                 }
856                         }
857
858                         if(a) {
859                                 neighb_t *n;
860
861                                 co_gs_foreach_neighb(a, n) {
862                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
863                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
864                                                 if(idx < n_idx)
865                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
866                                         }
867                                 }
868                         }
869                 }
870         }
871 }
872
873 typedef struct _appel_clique_walker_t {
874         phase_t ph;
875         const copy_opt_t *co;
876         int curr_nr;
877         int node_count;
878         FILE *f;
879         int dumb;
880         int *color_map;
881         struct obstack obst;
882 } appel_clique_walker_t;
883
884 typedef struct _appel_block_info_t {
885         int *live_end_nr;
886         int *live_in_nr;
887         int *phi_nr;
888         ir_node **live_end;
889         ir_node **live_in;
890         ir_node **phi;
891         int n_live_end;
892         int n_live_in;
893         int n_phi;
894 } appel_block_info_t;
895
896 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
897 {
898 #if 0
899         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
900         int res = (int) freq;
901         return res == 0 ? 1 : res;
902 #else
903         ir_loop *loop = get_irn_loop(bl);
904         if(loop) {
905                 int d = get_loop_depth(loop);
906                 return 1 + d * d;
907         }
908         return 1;
909 #endif
910 }
911
912 static void *appel_clique_walker_irn_init(phase_t *phase, ir_node *irn, void *old)
913 {
914         appel_block_info_t *res = NULL;
915
916         if(is_Block(irn)) {
917                 appel_clique_walker_t *d = (void *) phase;
918                 res = phase_alloc(phase, sizeof(res[0]));
919                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
920                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
921                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
922                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
923                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
924                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
925         }
926
927         return res;
928 }
929
930 typedef struct _insn_list_t {
931         be_insn_t *insn;
932         struct list_head list;
933 } insn_list_t;
934
935 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
936 {
937         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
938         int i;
939
940         for(i = 0; i < bli->n_live_end; ++i)
941                 if(bli->live_end[i] == irn)
942                         return bli->live_end_nr[i];
943
944         return -1;
945 }
946
947 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
948 {
949         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
950         ir_node *irn;
951         int n_live;
952         int j;
953
954         n_live = 0;
955         foreach_pset(live, irn)
956                 live_arr[n_live++] = irn;
957
958         /* dump the live after clique */
959         if(!env->dumb) {
960                 for(j = 0; j < n_live; ++j) {
961                         int k;
962
963                         for(k = j + 1; k < n_live; ++k) {
964                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
965                         }
966                         fprintf(env->f, "\n");
967                 }
968         }
969
970         /* dump the affinities */
971         for(j = 0; !env->dumb && j < n_live; ++j) {
972                 ir_node *irn = live_arr[j];
973                 int old_nr = PTR_TO_INT(get_irn_link(irn));
974
975                 /* if the node was already live in the last insn dump the affinity */
976                 if(old_nr > start_nr) {
977                         int weight = appel_aff_weight(env, bl);
978                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
979                 }
980         }
981
982         /* set the current numbers into the link field. */
983         for(j = 0; j < n_live; ++j) {
984                 ir_node *irn = live_arr[j];
985                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
986         }
987
988         return curr_nr + n_live;
989 }
990
991 static void appel_walker(ir_node *bl, void *data)
992 {
993         appel_clique_walker_t *env = data;
994         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
995         struct obstack *obst       = &env->obst;
996         void *base                 = obstack_base(obst);
997         pset *live                 = pset_new_ptr_default();
998
999         int n_insns  = 0;
1000         int n_nodes  = 0;
1001         int start_nr = env->curr_nr;
1002         int curr_nr  = start_nr;
1003
1004         be_insn_env_t insn_env;
1005         int i, j;
1006         ir_node *irn;
1007         be_insn_t **insns;
1008
1009         insn_env.aenv = env->co->aenv;
1010         insn_env.cls  = env->co->cls;
1011         insn_env.obst = obst;
1012         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1013
1014         /* Guess how many insns will be in this block. */
1015         sched_foreach(bl, irn)
1016                 n_nodes++;
1017
1018         bli->n_phi = 0;
1019         insns = malloc(n_nodes * sizeof(insns[0]));
1020
1021         /* Put all insns in an array. */
1022         irn = sched_first(bl);
1023         while(!sched_is_end(irn)) {
1024                 be_insn_t *insn;
1025                 insn = be_scan_insn(&insn_env, irn);
1026                 insns[n_insns++] = insn;
1027                 irn = insn->next_insn;
1028         }
1029
1030         DBG((env->co->cenv->dbg, LEVEL_2, "%+F\n", bl));
1031         be_liveness_end_of_block(env->co->cenv->lv, env->co->aenv, env->co->cls, bl, live);
1032
1033         /* Generate the bad and ugly. */
1034         for(i = n_insns - 1; i >= 0; --i) {
1035                 be_insn_t *insn = insns[i];
1036
1037                 /* The first live set has to be saved in the block border set. */
1038                 if(i == n_insns - 1) {
1039                         j = 0;
1040                         foreach_pset(live, irn) {
1041                                 bli->live_end[j]    = irn;
1042                                 bli->live_end_nr[j] = curr_nr + j;
1043                                 ++j;
1044                         }
1045                         bli->n_live_end = j;
1046                 }
1047
1048                 if(!env->dumb) {
1049                         for(j = 0; j < insn->use_start; ++j) {
1050                                 ir_node *op   = insn->ops[j].carrier;
1051                                 bitset_t *adm = insn->ops[j].regs;
1052                                 int k;
1053                                 int nr;
1054
1055                                 if(!insn->ops[j].has_constraints)
1056                                         continue;
1057
1058                                 nr = 0;
1059                                 foreach_pset(live, irn) {
1060                                         if(irn == op) {
1061                                                 pset_break(live);
1062                                                 break;
1063                                         }
1064                                         ++nr;
1065                                 }
1066
1067                                 assert(nr < pset_count(live));
1068
1069                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1070                                         int mapped_col = env->color_map[k];
1071                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1072                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1073                                 }
1074                         }
1075                 }
1076
1077                 /* dump the clique and update the stuff. */
1078                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1079
1080                 /* remove all defs. */
1081                 for(j = 0; j < insn->use_start; ++j)
1082                         pset_remove_ptr(live, insn->ops[j].carrier);
1083
1084                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1085                         bli->phi[bli->n_phi]    = insn->irn;
1086                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1087                         bli->n_phi++;
1088                 }
1089
1090                 /* add all uses */
1091                 else
1092                         for(j = insn->use_start; j < insn->n_ops; ++j)
1093                                 pset_insert_ptr(live, insn->ops[j].carrier);
1094         }
1095
1096         /* print the start clique. */
1097         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1098
1099         i = 0;
1100         foreach_pset(live, irn) {
1101                 bli->live_in[i]    = irn;
1102                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1103                 ++i;
1104         }
1105         bli->n_live_in = i;
1106
1107         del_pset(live);
1108         free(insns);
1109         obstack_free(obst, base);
1110         env->curr_nr = curr_nr;
1111 }
1112
1113 static void appel_inter_block_aff(ir_node *bl, void *data)
1114 {
1115         appel_clique_walker_t *env = data;
1116         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1117
1118         int i, j, n;
1119
1120         for(i = 0; i < bli->n_live_in; ++i) {
1121                 ir_node *irn = bli->live_in[i];
1122
1123                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1124                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1125
1126                         int nr = appel_get_live_end_nr(env, pred, irn);
1127                         assert(nr >= 0);
1128                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1129                 }
1130         }
1131
1132         for(i = 0; i < bli->n_phi; ++i) {
1133                 ir_node *irn = bli->phi[i];
1134
1135                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1136                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1137                         ir_node *op = get_irn_n(irn, j);
1138
1139                         int nr = appel_get_live_end_nr(env, pred, op);
1140                         assert(nr >= 0);
1141                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1142                 }
1143         }
1144
1145 }
1146
1147 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1148 {
1149         int i;
1150         int n_colors;
1151         appel_clique_walker_t env;
1152         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1153
1154         be_liveness_recompute(co->cenv->lv);
1155         obstack_init(&env.obst);
1156         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init);
1157         env.curr_nr = co->cls->n_regs;
1158         env.co = co;
1159         env.f = f;
1160
1161         bitset_copy(adm, co->cenv->ignore_colors);
1162         bitset_flip_all(adm);
1163
1164         /* Make color map. */
1165         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1166         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1167                 const arch_register_t *reg = &co->cls->regs[i];
1168                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1169         }
1170
1171         env.dumb = 1;
1172         env.curr_nr = n_colors;
1173         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1174         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1175
1176         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1177
1178         /* make the first k nodes interfere */
1179         for(i = 0; i < n_colors; ++i) {
1180                 int j;
1181                 for(j = i + 1; j < n_colors; ++j)
1182                         fprintf(f, "%d %d -1 ", i, j);
1183                 fprintf(f, "\n");
1184         }
1185
1186         env.dumb = 0;
1187         env.curr_nr = n_colors;
1188         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1189         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1190         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1191         obstack_free(&env.obst, NULL);
1192 }
1193
1194 /*
1195          ___ _____ ____   ____   ___ _____   ____                        _
1196         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1197          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1198          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1199         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1200                                                                   |_|            |___/
1201 */
1202
1203 static const char *get_dot_color_name(int col)
1204 {
1205         static const char *names[] = {
1206                 "blue",
1207                 "red",
1208                 "green",
1209                 "yellow",
1210                 "cyan",
1211                 "magenta",
1212                 "orange",
1213                 "chocolate",
1214                 "beige",
1215                 "navy",
1216                 "darkgreen",
1217                 "darkred",
1218                 "lightPink",
1219                 "chartreuse",
1220                 "lightskyblue",
1221                 "linen",
1222                 "pink",
1223                 "lightslateblue",
1224                 "mintcream",
1225                 "red",
1226                 "darkolivegreen",
1227                 "mediumblue",
1228                 "mistyrose",
1229                 "salmon",
1230                 "darkseagreen",
1231                 "mediumslateblue"
1232                 "moccasin",
1233                 "tomato",
1234                 "forestgreen",
1235                 "darkturquoise",
1236                 "palevioletred"
1237         };
1238
1239         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1240 }
1241
1242 typedef struct _co_ifg_dump_t {
1243         const copy_opt_t *co;
1244         unsigned flags;
1245 } co_ifg_dump_t;
1246
1247 static void ifg_dump_graph_attr(FILE *f, void *self)
1248 {
1249         fprintf(f, "overlap=scale");
1250 }
1251
1252 static int ifg_is_dump_node(void *self, ir_node *irn)
1253 {
1254         co_ifg_dump_t *cod = self;
1255         return !arch_irn_is(cod->co->aenv, irn, ignore);
1256 }
1257
1258 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1259 {
1260         co_ifg_dump_t *env         = self;
1261         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1262         arch_register_req_t req;
1263         int limited;
1264
1265         arch_get_register_req(env->co->aenv, &req, irn, BE_OUT_POS(0));
1266         limited = arch_register_req_is(&req, limited);
1267
1268         if(env->flags & CO_IFG_DUMP_LABELS) {
1269                 ir_fprintf(f, "label=\"%+F", irn);
1270
1271                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1272                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1273                         req.limited(req.limited_env, bs);
1274                         ir_fprintf(f, "\\n%B", bs);
1275                 }
1276                 ir_fprintf(f, "\" ");
1277         }
1278
1279         else
1280                 fprintf(f, "label=\"\" shape=point " );
1281
1282         if(env->flags & CO_IFG_DUMP_SHAPE)
1283                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1284
1285         if(env->flags & CO_IFG_DUMP_COLORS)
1286                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1287 }
1288
1289 static void ifg_dump_at_end(FILE *file, void *self)
1290 {
1291         co_ifg_dump_t *env = self;
1292         affinity_node_t *a;
1293
1294         co_gs_foreach_aff_node(env->co, a) {
1295                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1296                 unsigned aidx = get_irn_idx(a->irn);
1297                 neighb_t *n;
1298
1299                 co_gs_foreach_neighb(a, n) {
1300                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1301                         unsigned nidx = get_irn_idx(n->irn);
1302
1303                         if(aidx < nidx) {
1304                                 const char *color = nr == ar ? "blue" : "red";
1305                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1306                                 if(env->flags & CO_IFG_DUMP_LABELS)
1307                                         fprintf(file, "label=\"%d\" ", n->costs);
1308                                 if(env->flags & CO_IFG_DUMP_COLORS)
1309                                         fprintf(file, "color=%s ", color);
1310                                 else
1311                                         fprintf(file, "style=dotted");
1312                                 fprintf(file, "];\n");
1313                         }
1314                 }
1315         }
1316 }
1317
1318
1319 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1320         ifg_is_dump_node,
1321         ifg_dump_graph_attr,
1322         ifg_dump_node_attr,
1323         NULL,
1324         NULL,
1325         ifg_dump_at_end
1326 };
1327
1328
1329
1330 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1331 {
1332         co_ifg_dump_t cod;
1333
1334         cod.co    = co;
1335         cod.flags = flags;
1336         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1337 }
1338
1339
1340 void co_solve_park_moon(copy_opt_t *opt)
1341 {
1342
1343 }
1344
1345 static int void_algo(copy_opt_t *co)
1346 {
1347         return 0;
1348 }
1349
1350 /*
1351                 _    _                  _ _   _
1352            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1353           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1354          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1355         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1356                            |___/
1357 */
1358
1359 typedef struct {
1360         co_algo_t *algo;
1361         const char *name;
1362         int can_improve_existing;
1363 } co_algo_info_t;
1364
1365 static co_algo_info_t algos[] = {
1366         { void_algo,                "none",  0 },
1367         { co_solve_heuristic,       "heur1", 0 },
1368         { co_solve_heuristic_new,   "heur2", 0 },
1369         { co_solve_heuristic_java,  "heur3", 0 },
1370 #ifdef WITH_ILP
1371         { co_solve_ilp2,            "ilp",   1 },
1372 #endif
1373         { NULL,                     "",      0 }
1374 };
1375
1376 /*
1377     __  __       _         ____       _
1378    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1379    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1380    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1381    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1382
1383 */
1384
1385 void co_driver(be_chordal_env_t *cenv)
1386 {
1387 #ifdef WITH_LIBCORE
1388         lc_timer_t *timer = lc_timer_register("firm.be.copyopt", "runtime");
1389 #endif
1390         co_complete_stats_t before, after;
1391         copy_opt_t *co;
1392         co_algo_t  *algo_func;
1393         int was_optimal = 0;
1394
1395         if(algo < 0 || algo >= CO_ALGO_LAST)
1396                 return;
1397
1398         co = new_copy_opt(cenv, cost_func);
1399         co_build_ou_structure(co);
1400         co_build_graph_structure(co);
1401
1402         co_complete_stats(co, &before);
1403
1404         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1405         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1406         be_stat_ev_ull("co_max_costs",    before.max_costs);
1407         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1408         be_stat_ev_ull("co_aff_int",      before.aff_int);
1409
1410         be_stat_ev_ull("co_init_costs",   before.costs);
1411         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1412
1413         /* Dump the interference graph in Appel's format. */
1414         if(dump_flags & DUMP_APPEL) {
1415                 FILE *f = be_chordal_open(cenv, "", ".apl");
1416                 co_dump_appel_graph(co, f);
1417                 fclose(f);
1418         }
1419
1420         if(dump_flags & DUMP_BEFORE) {
1421                 FILE *f = be_chordal_open(cenv, "", "-before.dot");
1422                 co_dump_ifg_dot(co, f, style_flags);
1423                 fclose(f);
1424         }
1425
1426         /* if the algo can improve results, provide an initial solution with heur3 */
1427         if(improve && algos[algo].can_improve_existing) {
1428                 co_complete_stats_t stats;
1429
1430                 /* produce a heuristical solution */
1431                 co_solve_heuristic_java(co);
1432
1433                 /* do the stats and provide the current costs */
1434                 co_complete_stats(co, &stats);
1435                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1436         }
1437
1438         /* start the JVM here so that it does not tamper the timing. */
1439         if(algo == CO_ALGO_HEUR3)
1440                 be_java_coal_start_jvm();
1441
1442         algo_func = algos[algo].algo;
1443
1444 #ifdef WITH_LIBCORE
1445         lc_timer_reset_and_start(timer);
1446 #endif
1447
1448         was_optimal = algo_func(co);
1449
1450 #ifdef WITH_LIBCORE
1451         lc_timer_stop(timer);
1452         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1453 #endif
1454
1455         be_stat_ev_ull("co_optimal", was_optimal);
1456
1457         if(dump_flags & DUMP_AFTER) {
1458                 FILE *f = be_chordal_open(cenv, "", "-after.dot");
1459                 co_dump_ifg_dot(co, f, style_flags);
1460                 fclose(f);
1461         }
1462
1463         co_complete_stats(co, &after);
1464
1465         if(do_stats) {
1466                 int optimizable_costs = after.max_costs - after.inevit_costs;
1467                 int evitable          = after.costs     - after.inevit_costs;
1468
1469                 ir_printf("%30F ", cenv->irg);
1470                 printf("%10s %10llu%10llu%10llu", cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1471
1472                 if(optimizable_costs > 0)
1473                         printf("%10llu %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1474                 else
1475                         printf("%10llu %5s\n", after.costs, "-");
1476         }
1477
1478         be_stat_ev_ull("co_after_costs", after.costs);
1479         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1480
1481         co_free_graph_structure(co);
1482         co_free_ou_structure(co);
1483         free_copy_opt(co);
1484 }