70ebdfcac8f106ca64ab38da74a92be2fb83e804
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "benodesets.h"
64 #include "bejavacoal.h"
65 #include "bestatevent.h"
66 #include "beirg_t.h"
67 #include "error.h"
68
69 #include <libcore/lc_timing.h>
70 #include <libcore/lc_opts.h>
71 #include <libcore/lc_opts_enum.h>
72
73 #define DUMP_BEFORE 1
74 #define DUMP_AFTER  2
75 #define DUMP_APPEL  4
76 #define DUMP_ALL    2 * DUMP_APPEL - 1
77
78 #define COST_FUNC_FREQ     1
79 #define COST_FUNC_LOOP     2
80 #define COST_FUNC_ALL_ONE  3
81
82 static unsigned   dump_flags  = 0;
83 static unsigned   style_flags = 0;
84 static unsigned   do_stats    = 0;
85 static cost_fct_t cost_func   = co_get_costs_exec_freq;
86 static unsigned   algo        = CO_ALGO_HEUR4;
87 static int        improve     = 1;
88
89 static const lc_opt_enum_mask_items_t dump_items[] = {
90         { "before",  DUMP_BEFORE },
91         { "after",   DUMP_AFTER  },
92         { "appel",   DUMP_APPEL  },
93         { "all",     DUMP_ALL    },
94         { NULL,      0 }
95 };
96
97 static const lc_opt_enum_mask_items_t style_items[] = {
98         { "color",   CO_IFG_DUMP_COLORS },
99         { "labels",  CO_IFG_DUMP_LABELS },
100         { "constr",  CO_IFG_DUMP_CONSTR },
101         { "shape",   CO_IFG_DUMP_SHAPE  },
102         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
103         { NULL,      0 }
104 };
105
106 static const lc_opt_enum_mask_items_t algo_items[] = {
107         { "none",   CO_ALGO_NONE  },
108         { "heur",   CO_ALGO_HEUR  },
109         { "heur2",  CO_ALGO_HEUR2 },
110 #ifdef WITH_JVM
111         { "heur3",  CO_ALGO_HEUR3 },
112 #endif /* WITH_JVM */
113         { "heur4",  CO_ALGO_HEUR4 },
114 #ifdef WITH_ILP
115         { "ilp",    CO_ALGO_ILP   },
116 #endif /* WITH_ILP */
117         { NULL,     0 }
118 };
119
120 typedef int (*opt_funcptr)(void);
121
122 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
123         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
124         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
125         { "one",    (opt_funcptr) co_get_costs_all_one },
126         { NULL,     NULL }
127 };
128
129 static lc_opt_enum_mask_var_t dump_var = {
130         &dump_flags, dump_items
131 };
132
133 static lc_opt_enum_mask_var_t style_var = {
134         &style_flags, style_items
135 };
136
137 static lc_opt_enum_mask_var_t algo_var = {
138         &algo, algo_items
139 };
140
141 static lc_opt_enum_func_ptr_var_t cost_func_var = {
142         (opt_funcptr*) &cost_func, cost_func_items
143 };
144
145 static const lc_opt_table_entry_t options[] = {
146         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
147         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
148         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
149         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
150         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
151         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
152         LC_OPT_LAST
153 };
154
155 /* Insert additional options registration functions here. */
156 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
157 extern void be_co2_register_options(lc_opt_entry_t *grp);
158 extern void be_co3_register_options(lc_opt_entry_t *grp);
159
160 void be_init_copycoal(void)
161 {
162         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
163         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
164         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
165         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
166
167         lc_opt_add_table(co_grp, options);
168 }
169
170 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
171
172 #undef QUICK_AND_DIRTY_HACK
173
174 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
175 {
176         if (env->ifg)
177                 return be_ifg_connected(env->ifg, a, b);
178         else
179                 return values_interfere(env->birg, a, b);
180 }
181
182
183 /******************************************************************************
184     _____                           _
185    / ____|                         | |
186   | |  __  ___ _ __   ___ _ __ __ _| |
187   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
188   | |__| |  __/ | | |  __/ | | (_| | |
189    \_____|\___|_| |_|\___|_|  \__,_|_|
190
191  ******************************************************************************/
192
193 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
194
195
196 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
197 {
198         const char *s1, *s2, *s3;
199         int len;
200         copy_opt_t *co;
201
202         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
203
204         co = xcalloc(1, sizeof(*co));
205         co->cenv      = chordal_env;
206         co->aenv      = chordal_env->birg->main_env->arch_env;
207         co->irg       = chordal_env->irg;
208         co->cls       = chordal_env->cls;
209         co->get_costs = get_costs;
210
211         s1 = get_irp_prog_name();
212         s2 = get_entity_name(get_irg_entity(co->irg));
213         s3 = chordal_env->cls->name;
214         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
215         co->name = xmalloc(len);
216         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
217
218         return co;
219 }
220
221 void free_copy_opt(copy_opt_t *co) {
222         xfree(co->name);
223         free(co);
224 }
225
226 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
227         const arch_register_req_t *req;
228         const arch_register_t *reg;
229
230         if (arch_irn_is(co->aenv, irn, ignore))
231                 return 0;
232
233         reg = arch_get_irn_register(co->aenv, irn);
234         if (arch_register_type_is(reg, ignore))
235                 return 0;
236
237         req = arch_get_register_req(co->aenv, irn, -1);
238         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
239                 return 1;
240
241         return 0;
242 }
243
244 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
245         int cost = 0;
246         ir_loop *loop;
247         ir_node *root_block = get_nodes_block(root);
248         (void) co;
249         (void) arg;
250
251         if (is_Phi(root)) {
252                 /* for phis the copies are placed in the corresponding pred-block */
253                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
254         } else {
255                 /* a perm places the copy in the same block as it resides */
256                 loop = get_irn_loop(root_block);
257         }
258         if (loop) {
259                 int d = get_loop_depth(loop);
260                 cost = d*d;
261         }
262         return 1+cost;
263 }
264
265 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
266         int res;
267         ir_node *root_bl = get_nodes_block(root);
268         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
269         (void) arg;
270         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
271
272         /* don't allow values smaller than one. */
273         return res < 1 ? 1 : res;
274 }
275
276
277 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
278         (void) co;
279         (void) root;
280         (void) arg;
281         (void) pos;
282         return 1;
283 }
284
285 /******************************************************************************
286    ____        _   _    _       _ _          _____ _
287   / __ \      | | | |  | |     (_) |        / ____| |
288  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
289  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
290  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
291   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
292         | |                                                            __/ |
293         |_|                                                           |___/
294  ******************************************************************************/
295
296 /**
297  * Determines a maximum weighted independent set with respect to
298  * the interference and conflict edges of all nodes in a qnode.
299  */
300 static int ou_max_ind_set_costs(unit_t *ou) {
301         be_chordal_env_t *chordal_env = ou->co->cenv;
302         ir_node **safe, **unsafe;
303         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
304         bitset_t *curr;
305         bitset_pos_t pos;
306         int max, curr_weight, best_weight = 0;
307
308         /* assign the nodes into two groups.
309          * safe: node has no interference, hence it is in every max stable set.
310          * unsafe: node has an interference
311          */
312         safe = alloca((ou->node_count-1) * sizeof(*safe));
313         safe_costs = 0;
314         safe_count = 0;
315         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
316         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
317         unsafe_count = 0;
318         for(i=1; i<ou->node_count; ++i) {
319                 int is_safe = 1;
320                 for(o=1; o<ou->node_count; ++o) {
321                         if (i==o)
322                                 continue;
323                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
324                                 unsafe_costs[unsafe_count] = ou->costs[i];
325                                 unsafe[unsafe_count] = ou->nodes[i];
326                                 ++unsafe_count;
327                                 is_safe = 0;
328                                 break;
329                         }
330                 }
331                 if (is_safe) {
332                         safe_costs += ou->costs[i];
333                         safe[safe_count++] = ou->nodes[i];
334                 }
335         }
336
337
338         /* now compute the best set out of the unsafe nodes*/
339         if (unsafe_count > MIS_HEUR_TRIGGER) {
340                 bitset_t *best = bitset_alloca(unsafe_count);
341                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
342                 for (i=0; i<unsafe_count; ++i) {
343                         bitset_set(best, i);
344                         /* check if it is a stable set */
345                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
346                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
347                                         bitset_clear(best, i); /* clear the bit and try next one */
348                                         break;
349                                 }
350                 }
351                 /* compute the weight */
352                 bitset_foreach(best, pos)
353                         best_weight += unsafe_costs[pos];
354         } else {
355                 /* Exact Algorithm: Brute force */
356                 curr = bitset_alloca(unsafe_count);
357                 bitset_set_all(curr);
358                 while ((max = bitset_popcnt(curr)) != 0) {
359                         /* check if curr is a stable set */
360                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
361                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
362                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
363                                                         goto no_stable_set;
364
365                         /* if we arrive here, we have a stable set */
366                         /* compute the weigth of the stable set*/
367                         curr_weight = 0;
368                         bitset_foreach(curr, pos)
369                                 curr_weight += unsafe_costs[pos];
370
371                         /* any better ? */
372                         if (curr_weight > best_weight) {
373                                 best_weight = curr_weight;
374                         }
375
376         no_stable_set:
377                         bitset_minus1(curr);
378                 }
379         }
380
381         return safe_costs+best_weight;
382 }
383
384 static void co_collect_units(ir_node *irn, void *env) {
385         copy_opt_t *co = env;
386         unit_t *unit;
387
388         if (!is_curr_reg_class(co, irn))
389                 return;
390         if (!co_is_optimizable_root(co, irn))
391                 return;
392
393         /* Init a new unit */
394         unit = xcalloc(1, sizeof(*unit));
395         unit->co = co;
396         unit->node_count = 1;
397         INIT_LIST_HEAD(&unit->queue);
398
399         /* Phi with some/all of its arguments */
400         if (is_Reg_Phi(irn)) {
401                 int i, arity;
402
403                 /* init */
404                 arity = get_irn_arity(irn);
405                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
406                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
407                 unit->nodes[0] = irn;
408
409                 /* fill */
410                 for (i=0; i<arity; ++i) {
411                         int o, arg_pos;
412                         ir_node *arg = get_irn_n(irn, i);
413
414                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
415                         if (arg == irn)
416                                 continue;
417                         if (nodes_interfere(co->cenv, irn, arg)) {
418                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
419                                 continue;
420                         }
421
422                         /* Else insert the argument of the phi to the members of this ou */
423                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
424
425                         if (! arch_irn_is(co->aenv, arg, ignore)) {
426                                 /* Check if arg has occurred at a prior position in the arg/list */
427                                 arg_pos = 0;
428                                 for (o=1; o<unit->node_count; ++o) {
429                                         if (unit->nodes[o] == arg) {
430                                                 arg_pos = o;
431                                                 break;
432                                         }
433                                 }
434
435                                 if (!arg_pos) { /* a new argument */
436                                         /* insert node, set costs */
437                                         unit->nodes[unit->node_count] = arg;
438                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
439                                         unit->node_count++;
440                                 } else { /* arg has occurred before in same phi */
441                                         /* increase costs for existing arg */
442                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
443                                 }
444                         }
445                 }
446                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
447                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
448         } else
449
450         /* Proj of a perm with corresponding arg */
451         if (is_Perm_Proj(co->aenv, irn)) {
452                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
453                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
454                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
455                 unit->node_count = 2;
456                 unit->nodes[0] = irn;
457                 unit->nodes[1] = get_Perm_src(irn);
458                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
459         } else {
460                 const arch_register_req_t *req =
461                         arch_get_register_req(co->aenv, irn, -1);
462
463                 /* Src == Tgt of a 2-addr-code instruction */
464                 if (is_2addr_code(req)) {
465                         ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
466                         if (!arch_irn_is(co->aenv, other, ignore) &&
467                                         !nodes_interfere(co->cenv, irn, other)) {
468                                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
469                                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
470                                 unit->node_count = 2;
471                                 unit->nodes[0] = irn;
472                                 unit->nodes[1] = other;
473                                 unit->costs[1] = co->get_costs(co, irn, other, -1);
474                         }
475                 } else {
476                         assert(0 && "This is not an optimizable node!");
477                 }
478         }
479
480         /* Insert the new unit at a position according to its costs */
481         if (unit->node_count > 1) {
482                 int i;
483                 struct list_head *tmp;
484
485                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
486                 for(i=1; i<unit->node_count; ++i) {
487                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
488                         unit->all_nodes_costs += unit->costs[i];
489                 }
490
491                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
492                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
493                 /* Insert the new ou according to its sort_key */
494                 tmp = &co->units;
495                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
496                         tmp = tmp->next;
497                 list_add(&unit->units, tmp);
498         } else {
499                 free(unit);
500         }
501 }
502
503 #ifdef QUICK_AND_DIRTY_HACK
504
505 static int compare_ous(const void *k1, const void *k2) {
506         const unit_t *u1 = *((const unit_t **) k1);
507         const unit_t *u2 = *((const unit_t **) k2);
508         int i, o, u1_has_constr, u2_has_constr;
509         arch_register_req_t req;
510         const arch_env_t *aenv = u1->co->aenv;
511
512         /* Units with constraints come first */
513         u1_has_constr = 0;
514         for (i=0; i<u1->node_count; ++i) {
515                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
516                 if (arch_register_req_is(&req, limited)) {
517                         u1_has_constr = 1;
518                         break;
519                 }
520         }
521
522         u2_has_constr = 0;
523         for (i=0; i<u2->node_count; ++i) {
524                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
525                 if (arch_register_req_is(&req, limited)) {
526                         u2_has_constr = 1;
527                         break;
528                 }
529         }
530
531         if (u1_has_constr != u2_has_constr)
532                 return u2_has_constr - u1_has_constr;
533
534         /* Now check, whether the two units are connected */
535 #if 0
536         for (i=0; i<u1->node_count; ++i)
537                 for (o=0; o<u2->node_count; ++o)
538                         if (u1->nodes[i] == u2->nodes[o])
539                                 return 0;
540 #endif
541
542         /* After all, the sort key decides. Greater keys come first. */
543         return u2->sort_key - u1->sort_key;
544
545 }
546
547 /**
548  * Sort the ou's according to constraints and their sort_key
549  */
550 static void co_sort_units(copy_opt_t *co) {
551         int i, count = 0, costs;
552         unit_t *ou, **ous;
553
554         /* get the number of ous, remove them form the list and fill the array */
555         list_for_each_entry(unit_t, ou, &co->units, units)
556                 count++;
557         ous = alloca(count * sizeof(*ous));
558
559         costs = co_get_max_copy_costs(co);
560
561         i = 0;
562         list_for_each_entry(unit_t, ou, &co->units, units)
563                 ous[i++] = ou;
564
565         INIT_LIST_HEAD(&co->units);
566
567         assert(count == i && list_empty(&co->units));
568
569         for (i=0; i<count; ++i)
570                 ir_printf("%+F\n", ous[i]->nodes[0]);
571
572         qsort(ous, count, sizeof(*ous), compare_ous);
573
574         ir_printf("\n\n");
575         for (i=0; i<count; ++i)
576                 ir_printf("%+F\n", ous[i]->nodes[0]);
577
578         /* reinsert into list in correct order */
579         for (i=0; i<count; ++i)
580                 list_add_tail(&ous[i]->units, &co->units);
581
582         assert(costs == co_get_max_copy_costs(co));
583 }
584 #endif
585
586 void co_build_ou_structure(copy_opt_t *co) {
587         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
588         INIT_LIST_HEAD(&co->units);
589         irg_walk_graph(co->irg, co_collect_units, NULL, co);
590 #ifdef QUICK_AND_DIRTY_HACK
591         co_sort_units(co);
592 #endif
593 }
594
595 void co_free_ou_structure(copy_opt_t *co) {
596         unit_t *curr, *tmp;
597         ASSERT_OU_AVAIL(co);
598         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
599                 xfree(curr->nodes);
600                 xfree(curr->costs);
601                 xfree(curr);
602         }
603         co->units.next = NULL;
604 }
605
606 /* co_solve_heuristic() is implemented in becopyheur.c */
607
608 int co_get_max_copy_costs(const copy_opt_t *co) {
609         int i, res = 0;
610         unit_t *curr;
611
612         ASSERT_OU_AVAIL(co);
613
614         list_for_each_entry(unit_t, curr, &co->units, units) {
615                 res += curr->inevitable_costs;
616                 for (i=1; i<curr->node_count; ++i)
617                         res += curr->costs[i];
618         }
619         return res;
620 }
621
622 int co_get_inevit_copy_costs(const copy_opt_t *co) {
623         int res = 0;
624         unit_t *curr;
625
626         ASSERT_OU_AVAIL(co);
627
628         list_for_each_entry(unit_t, curr, &co->units, units)
629                 res += curr->inevitable_costs;
630         return res;
631 }
632
633 int co_get_copy_costs(const copy_opt_t *co) {
634         int i, res = 0;
635         unit_t *curr;
636
637         ASSERT_OU_AVAIL(co);
638
639         list_for_each_entry(unit_t, curr, &co->units, units) {
640                 int root_col = get_irn_col(co, curr->nodes[0]);
641                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
642                 res += curr->inevitable_costs;
643                 for (i=1; i<curr->node_count; ++i) {
644                         int arg_col = get_irn_col(co, curr->nodes[i]);
645                         if (root_col != arg_col) {
646                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
647                                 res += curr->costs[i];
648                         }
649                 }
650         }
651         return res;
652 }
653
654 int co_get_lower_bound(const copy_opt_t *co) {
655         int res = 0;
656         unit_t *curr;
657
658         ASSERT_OU_AVAIL(co);
659
660         list_for_each_entry(unit_t, curr, &co->units, units)
661                 res += curr->inevitable_costs + curr->min_nodes_costs;
662         return res;
663 }
664
665 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
666 {
667         bitset_t *seen = bitset_irg_malloc(co->irg);
668         affinity_node_t *an;
669
670         memset(stat, 0, sizeof(stat[0]));
671
672         /* count affinity edges. */
673         co_gs_foreach_aff_node(co, an) {
674                 neighb_t *neigh;
675                 stat->aff_nodes += 1;
676                 bitset_add_irn(seen, an->irn);
677                 co_gs_foreach_neighb(an, neigh) {
678                         if(!bitset_contains_irn(seen, neigh->irn)) {
679                                 stat->aff_edges += 1;
680                                 stat->max_costs += neigh->costs;
681
682                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
683                                         stat->costs += neigh->costs;
684                                         stat->unsatisfied_edges += 1;
685                                 }
686
687                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
688                                         stat->aff_int += 1;
689                                         stat->inevit_costs += neigh->costs;
690                                 }
691
692                         }
693                 }
694         }
695
696         bitset_free(seen);
697 }
698
699 /******************************************************************************
700    _____                 _        _____ _
701   / ____|               | |      / ____| |
702  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
703  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
704  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
705   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
706                   | |                                       __/ |
707                   |_|                                      |___/
708  ******************************************************************************/
709
710 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
711         const affinity_node_t *n1 = k1;
712         const affinity_node_t *n2 = k2;
713         (void) size;
714
715         return (n1->irn != n2->irn);
716 }
717
718 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
719         affinity_node_t new_node, *node;
720         neighb_t        *nbr;
721         int             allocnew = 1;
722
723         new_node.irn        = n1;
724         new_node.degree     = 0;
725         new_node.neighbours = NULL;
726         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
727
728         for (nbr = node->neighbours; nbr; nbr = nbr->next)
729                 if (nbr->irn == n2) {
730                         allocnew = 0;
731                         break;
732                 }
733
734         /* if we did not find n2 in n1's neighbourhood insert it */
735         if (allocnew) {
736                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
737                 nbr->irn   = n2;
738                 nbr->costs = 0;
739                 nbr->next  = node->neighbours;
740
741                 node->neighbours = nbr;
742                 node->degree++;
743         }
744
745         /* now nbr points to n1's neighbour-entry of n2 */
746         nbr->costs += costs;
747 }
748
749 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
750         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
751                 add_edge(co, n1, n2, costs);
752                 add_edge(co, n2, n1, costs);
753         }
754 }
755
756 static void build_graph_walker(ir_node *irn, void *env) {
757         copy_opt_t *co = env;
758         int pos, max;
759         const arch_register_t *reg;
760
761         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
762                 return;
763
764         reg = arch_get_irn_register(co->aenv, irn);
765         if (arch_register_type_is(reg, ignore))
766                 return;
767
768         /* Phis */
769         if (is_Reg_Phi(irn))
770                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
771                         ir_node *arg = get_irn_n(irn, pos);
772                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
773                 }
774
775         /* Perms */
776         else if (is_Perm_Proj(co->aenv, irn)) {
777                 ir_node *arg = get_Perm_src(irn);
778                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
779         }
780
781         /* 2-address code */
782         else {
783                 const arch_register_req_t *req =
784                         arch_get_register_req(co->aenv, irn, -1);
785                 if (is_2addr_code(req)) {
786                         ir_node *other = get_irn_n(skip_Proj(irn), req->other_same);
787                         if (! arch_irn_is(co->aenv, other, ignore))
788                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
789                 }
790         }
791 }
792
793 void co_build_graph_structure(copy_opt_t *co) {
794         obstack_init(&co->obst);
795         co->nodes = new_set(compare_affinity_node_t, 32);
796
797         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
798 }
799
800 void co_free_graph_structure(copy_opt_t *co) {
801         ASSERT_GS_AVAIL(co);
802
803         del_set(co->nodes);
804         obstack_free(&co->obst, NULL);
805         co->nodes = NULL;
806 }
807
808 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
809
810 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
811         affinity_node_t new_node, *n;
812
813         ASSERT_GS_AVAIL(co);
814
815         new_node.irn = irn;
816         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
817         if (n) {
818                 return (n->degree > 0);
819         } else
820                 return 0;
821 }
822
823 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
824 {
825         be_ifg_t *ifg  = co->cenv->ifg;
826         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
827
828         ir_node *irn;
829         void *it, *nit;
830         int i, n, n_regs;
831
832         n_regs = 0;
833         for(i = 0; i < co->cls->n_regs; ++i) {
834                 const arch_register_t *reg = &co->cls->regs[i];
835                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
836         }
837
838         /*
839          * n contains the first node number.
840          * the values below n are the pre-colored register nodes
841          */
842
843         it  = be_ifg_nodes_iter_alloca(ifg);
844         nit = be_ifg_neighbours_iter_alloca(ifg);
845
846         n = n_regs;
847         be_ifg_foreach_node(ifg, it, irn) {
848                 if(!arch_irn_is(co->aenv, irn, ignore))
849                         set_irn_link(irn, INT_TO_PTR(n++));
850         }
851
852         fprintf(f, "%d %d\n", n, n_regs);
853
854         be_ifg_foreach_node(ifg, it, irn) {
855                 if(!arch_irn_is(co->aenv, irn, ignore)) {
856                         int idx            = PTR_TO_INT(get_irn_link(irn));
857                         affinity_node_t *a = get_affinity_info(co, irn);
858
859                         const arch_register_req_t *req;
860                         ir_node *adj;
861
862                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
863                         if(arch_register_req_is(req, limited)) {
864                                 for(i = 0; i < co->cls->n_regs; ++i) {
865                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
866                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
867                                 }
868                         }
869
870
871                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
872                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
873                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
874                                         if(idx < adj_idx)
875                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
876                                 }
877                         }
878
879                         if(a) {
880                                 neighb_t *n;
881
882                                 co_gs_foreach_neighb(a, n) {
883                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
884                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
885                                                 if(idx < n_idx)
886                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
887                                         }
888                                 }
889                         }
890                 }
891         }
892 }
893
894 typedef struct _appel_clique_walker_t {
895         ir_phase ph;
896         const copy_opt_t *co;
897         int curr_nr;
898         int node_count;
899         FILE *f;
900         int dumb;
901         int *color_map;
902         struct obstack obst;
903 } appel_clique_walker_t;
904
905 typedef struct _appel_block_info_t {
906         int *live_end_nr;
907         int *live_in_nr;
908         int *phi_nr;
909         ir_node **live_end;
910         ir_node **live_in;
911         ir_node **phi;
912         int n_live_end;
913         int n_live_in;
914         int n_phi;
915 } appel_block_info_t;
916
917 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
918 {
919 #if 0
920         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
921         int res = (int) freq;
922         return res == 0 ? 1 : res;
923 #else
924         ir_loop *loop = get_irn_loop(bl);
925         (void) env;
926         if(loop) {
927                 int d = get_loop_depth(loop);
928                 return 1 + d * d;
929         }
930         return 1;
931 #endif
932 }
933
934 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
935 {
936         appel_block_info_t *res = NULL;
937         (void) old;
938
939         if(is_Block(irn)) {
940                 appel_clique_walker_t *d = (void *) phase;
941                 res = phase_alloc(phase, sizeof(res[0]));
942                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
943                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
944                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
945                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
946                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
947                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
948         }
949
950         return res;
951 }
952
953 typedef struct _insn_list_t {
954         be_insn_t *insn;
955         struct list_head list;
956 } insn_list_t;
957
958 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
959 {
960         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
961         int i;
962
963         for(i = 0; i < bli->n_live_end; ++i)
964                 if(bli->live_end[i] == irn)
965                         return bli->live_end_nr[i];
966
967         return -1;
968 }
969
970 static int appel_dump_clique(appel_clique_walker_t *env, const ir_nodeset_t *live, ir_node *bl, int curr_nr, int start_nr)
971 {
972         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
973         ir_node *irn;
974         int n_live;
975         int j;
976         ir_nodeset_iterator_t iter;
977
978         n_live = 0;
979         foreach_ir_nodeset(live, irn, iter)
980                 live_arr[n_live++] = irn;
981
982         /* dump the live after clique */
983         if(!env->dumb) {
984                 for(j = 0; j < n_live; ++j) {
985                         int k;
986
987                         for(k = j + 1; k < n_live; ++k) {
988                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
989                         }
990                         fprintf(env->f, "\n");
991                 }
992         }
993
994         /* dump the affinities */
995         for(j = 0; !env->dumb && j < n_live; ++j) {
996                 ir_node *irn = live_arr[j];
997                 int old_nr = PTR_TO_INT(get_irn_link(irn));
998
999                 /* if the node was already live in the last insn dump the affinity */
1000                 if(old_nr > start_nr) {
1001                         int weight = appel_aff_weight(env, bl);
1002                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
1003                 }
1004         }
1005
1006         /* set the current numbers into the link field. */
1007         for(j = 0; j < n_live; ++j) {
1008                 ir_node *irn = live_arr[j];
1009                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1010         }
1011
1012         return curr_nr + n_live;
1013 }
1014
1015 static void appel_walker(ir_node *bl, void *data)
1016 {
1017         appel_clique_walker_t *env = data;
1018         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1019         struct obstack *obst       = &env->obst;
1020         void *base                 = obstack_base(obst);
1021         ir_nodeset_t live;
1022         ir_nodeset_iterator_t iter;
1023         be_lv_t *lv                = env->co->cenv->birg->lv;
1024
1025         int n_insns  = 0;
1026         int n_nodes  = 0;
1027         int start_nr = env->curr_nr;
1028         int curr_nr  = start_nr;
1029
1030         be_insn_env_t insn_env;
1031         int i, j;
1032         ir_node *irn;
1033         be_insn_t **insns;
1034
1035         insn_env.aenv = env->co->aenv;
1036         insn_env.cls  = env->co->cls;
1037         insn_env.obst = obst;
1038         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1039
1040         /* Guess how many insns will be in this block. */
1041         sched_foreach(bl, irn)
1042                 n_nodes++;
1043
1044         bli->n_phi = 0;
1045         insns = xmalloc(n_nodes * sizeof(insns[0]));
1046
1047         /* Put all insns in an array. */
1048         irn = sched_first(bl);
1049         while(!sched_is_end(irn)) {
1050                 be_insn_t *insn;
1051                 insn = be_scan_insn(&insn_env, irn);
1052                 insns[n_insns++] = insn;
1053                 irn = insn->next_insn;
1054         }
1055
1056         DBG((dbg, LEVEL_2, "%+F\n", bl));
1057         ir_nodeset_init(&live);
1058         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, &live);
1059
1060         /* Generate the bad and ugly. */
1061         for(i = n_insns - 1; i >= 0; --i) {
1062                 be_insn_t *insn = insns[i];
1063
1064                 /* The first live set has to be saved in the block border set. */
1065                 if(i == n_insns - 1) {
1066                         j = 0;
1067                         foreach_ir_nodeset(&live, irn, iter) {
1068                                 bli->live_end[j]    = irn;
1069                                 bli->live_end_nr[j] = curr_nr + j;
1070                                 ++j;
1071                         }
1072                         bli->n_live_end = j;
1073                 }
1074
1075                 if(!env->dumb) {
1076                         for(j = 0; j < insn->use_start; ++j) {
1077                                 ir_node *op   = insn->ops[j].carrier;
1078                                 bitset_t *adm = insn->ops[j].regs;
1079                                 int k;
1080                                 size_t nr;
1081
1082                                 if(!insn->ops[j].has_constraints)
1083                                         continue;
1084
1085                                 nr = 0;
1086                                 foreach_ir_nodeset(&live, irn, iter) {
1087                                         if(irn == op) {
1088                                                 break;
1089                                         }
1090                                         ++nr;
1091                                 }
1092
1093                                 assert(nr < ir_nodeset_size(&live));
1094
1095                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1096                                         int mapped_col = env->color_map[k];
1097                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1098                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1099                                 }
1100                         }
1101                 }
1102
1103                 /* dump the clique and update the stuff. */
1104                 curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1105
1106                 /* remove all defs. */
1107                 for(j = 0; j < insn->use_start; ++j)
1108                         ir_nodeset_remove(&live, insn->ops[j].carrier);
1109
1110                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1111                         bli->phi[bli->n_phi]    = insn->irn;
1112                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1113                         bli->n_phi++;
1114                 }
1115
1116                 /* add all uses */
1117                 else
1118                         for(j = insn->use_start; j < insn->n_ops; ++j)
1119                                 ir_nodeset_insert(&live, insn->ops[j].carrier);
1120         }
1121
1122         /* print the start clique. */
1123         curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1124
1125         i = 0;
1126         foreach_ir_nodeset(&live, irn, iter) {
1127                 bli->live_in[i]    = irn;
1128                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1129                 ++i;
1130         }
1131         bli->n_live_in = i;
1132
1133         ir_nodeset_destroy(&live);
1134         free(insns);
1135         obstack_free(obst, base);
1136         env->curr_nr = curr_nr;
1137 }
1138
1139 static void appel_inter_block_aff(ir_node *bl, void *data)
1140 {
1141         appel_clique_walker_t *env = data;
1142         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1143
1144         int i, j, n;
1145
1146         for(i = 0; i < bli->n_live_in; ++i) {
1147                 ir_node *irn = bli->live_in[i];
1148
1149                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1150                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1151
1152                         int nr = appel_get_live_end_nr(env, pred, irn);
1153                         assert(nr >= 0);
1154                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1155                 }
1156         }
1157
1158         for(i = 0; i < bli->n_phi; ++i) {
1159                 ir_node *irn = bli->phi[i];
1160
1161                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1162                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1163                         ir_node *op = get_irn_n(irn, j);
1164
1165                         int nr = appel_get_live_end_nr(env, pred, op);
1166                         assert(nr >= 0);
1167                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1168                 }
1169         }
1170
1171 }
1172
1173 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1174 {
1175         int i;
1176         int n_colors;
1177         appel_clique_walker_t env;
1178         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1179         be_lv_t *lv = co->cenv->birg->lv;
1180
1181         be_liveness_recompute(lv);
1182         obstack_init(&env.obst);
1183         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1184         env.curr_nr = co->cls->n_regs;
1185         env.co = co;
1186         env.f = f;
1187
1188         bitset_copy(adm, co->cenv->ignore_colors);
1189         bitset_flip_all(adm);
1190
1191         /* Make color map. */
1192         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1193         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1194                 const arch_register_t *reg = &co->cls->regs[i];
1195                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1196         }
1197
1198         env.dumb = 1;
1199         env.curr_nr = n_colors;
1200         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1201         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1202
1203         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1204
1205         /* make the first k nodes interfere */
1206         for(i = 0; i < n_colors; ++i) {
1207                 int j;
1208                 for(j = i + 1; j < n_colors; ++j)
1209                         fprintf(f, "%d %d -1 ", i, j);
1210                 fprintf(f, "\n");
1211         }
1212
1213         env.dumb = 0;
1214         env.curr_nr = n_colors;
1215         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1216         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1217         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1218         obstack_free(&env.obst, NULL);
1219 }
1220
1221 /*
1222          ___ _____ ____   ____   ___ _____   ____                        _
1223         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1224          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1225          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1226         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1227                                                                   |_|            |___/
1228 */
1229
1230 static const char *get_dot_color_name(size_t col)
1231 {
1232         static const char *names[] = {
1233                 "blue",
1234                 "red",
1235                 "green",
1236                 "yellow",
1237                 "cyan",
1238                 "magenta",
1239                 "orange",
1240                 "chocolate",
1241                 "beige",
1242                 "navy",
1243                 "darkgreen",
1244                 "darkred",
1245                 "lightPink",
1246                 "chartreuse",
1247                 "lightskyblue",
1248                 "linen",
1249                 "pink",
1250                 "lightslateblue",
1251                 "mintcream",
1252                 "red",
1253                 "darkolivegreen",
1254                 "mediumblue",
1255                 "mistyrose",
1256                 "salmon",
1257                 "darkseagreen",
1258                 "mediumslateblue"
1259                 "moccasin",
1260                 "tomato",
1261                 "forestgreen",
1262                 "darkturquoise",
1263                 "palevioletred"
1264         };
1265
1266         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1267 }
1268
1269 typedef struct _co_ifg_dump_t {
1270         const copy_opt_t *co;
1271         unsigned flags;
1272 } co_ifg_dump_t;
1273
1274 static void ifg_dump_graph_attr(FILE *f, void *self)
1275 {
1276         (void) self;
1277         fprintf(f, "overlap=scale");
1278 }
1279
1280 static int ifg_is_dump_node(void *self, ir_node *irn)
1281 {
1282         co_ifg_dump_t *cod = self;
1283         return !arch_irn_is(cod->co->aenv, irn, ignore);
1284 }
1285
1286 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1287 {
1288         co_ifg_dump_t *env         = self;
1289         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1290         const arch_register_req_t *req;
1291         int limited;
1292
1293         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1294         limited = arch_register_req_is(req, limited);
1295
1296         if(env->flags & CO_IFG_DUMP_LABELS) {
1297                 ir_fprintf(f, "label=\"%+F", irn);
1298
1299 #if 0
1300                 // TODO fix this...
1301                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1302                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1303                         req.limited(req.limited_env, bs);
1304                         ir_fprintf(f, "\\n%B", bs);
1305                 }
1306 #endif
1307                 ir_fprintf(f, "\" ");
1308         } else {
1309                 fprintf(f, "label=\"\" shape=point " );
1310         }
1311
1312         if(env->flags & CO_IFG_DUMP_SHAPE)
1313                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1314
1315         if(env->flags & CO_IFG_DUMP_COLORS)
1316                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1317 }
1318
1319 static void ifg_dump_at_end(FILE *file, void *self)
1320 {
1321         co_ifg_dump_t *env = self;
1322         affinity_node_t *a;
1323
1324         co_gs_foreach_aff_node(env->co, a) {
1325                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1326                 unsigned aidx = get_irn_idx(a->irn);
1327                 neighb_t *n;
1328
1329                 co_gs_foreach_neighb(a, n) {
1330                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1331                         unsigned nidx = get_irn_idx(n->irn);
1332
1333                         if(aidx < nidx) {
1334                                 const char *color = nr == ar ? "blue" : "red";
1335                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1336                                 if(env->flags & CO_IFG_DUMP_LABELS)
1337                                         fprintf(file, "label=\"%d\" ", n->costs);
1338                                 if(env->flags & CO_IFG_DUMP_COLORS)
1339                                         fprintf(file, "color=%s ", color);
1340                                 else
1341                                         fprintf(file, "style=dotted");
1342                                 fprintf(file, "];\n");
1343                         }
1344                 }
1345         }
1346 }
1347
1348
1349 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1350         ifg_is_dump_node,
1351         ifg_dump_graph_attr,
1352         ifg_dump_node_attr,
1353         NULL,
1354         NULL,
1355         ifg_dump_at_end
1356 };
1357
1358
1359
1360 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1361 {
1362         co_ifg_dump_t cod;
1363
1364         cod.co    = co;
1365         cod.flags = flags;
1366         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1367 }
1368
1369
1370 void co_solve_park_moon(copy_opt_t *opt)
1371 {
1372         (void) opt;
1373 }
1374
1375 static int void_algo(copy_opt_t *co)
1376 {
1377         (void) co;
1378         return 0;
1379 }
1380
1381 /*
1382                 _    _                  _ _   _
1383            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1384           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1385          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1386         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1387                            |___/
1388 */
1389
1390 typedef struct {
1391         co_algo_t  *algo;
1392         const char *name;
1393         int        can_improve_existing;
1394 } co_algo_info_t;
1395
1396 static co_algo_info_t algos[] = {
1397         { void_algo,               "none",  0 },
1398         { co_solve_heuristic,      "heur1", 0 },
1399         { co_solve_heuristic_new,  "heur2", 0 },
1400         { co_solve_heuristic_java, "heur3", 0 },
1401         { co_solve_heuristic_mst,  "heur4", 0 },
1402 #ifdef WITH_ILP
1403         { co_solve_ilp2,           "ilp",   1 },
1404 #endif
1405         { NULL,                    "",      0 }
1406 };
1407
1408 /*
1409     __  __       _         ____       _
1410    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1411    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1412    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1413    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1414
1415 */
1416
1417 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1418 {
1419         FILE *result;
1420         char buf[1024];
1421
1422         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1423         result = fopen(buf, "wt");
1424         if(result == NULL) {
1425                 panic("Couldn't open '%s' for writing.", buf);
1426         }
1427
1428         return result;
1429 }
1430
1431 void co_driver(be_chordal_env_t *cenv)
1432 {
1433         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1434         co_complete_stats_t before, after;
1435         copy_opt_t          *co;
1436         co_algo_t           *algo_func;
1437         int                 was_optimal = 0;
1438
1439         if (algo >= CO_ALGO_LAST)
1440                 return;
1441
1442         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1443
1444         co = new_copy_opt(cenv, cost_func);
1445         co_build_ou_structure(co);
1446         co_build_graph_structure(co);
1447
1448         co_complete_stats(co, &before);
1449
1450         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1451         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1452         be_stat_ev_ull("co_max_costs",    before.max_costs);
1453         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1454         be_stat_ev_ull("co_aff_int",      before.aff_int);
1455
1456         be_stat_ev_ull("co_init_costs",   before.costs);
1457         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1458
1459         /* Dump the interference graph in Appel's format. */
1460         if (dump_flags & DUMP_APPEL) {
1461                 FILE *f = my_open(cenv, "", ".apl");
1462                 co_dump_appel_graph(co, f);
1463                 fclose(f);
1464         }
1465
1466         if (dump_flags & DUMP_BEFORE) {
1467                 FILE *f = my_open(cenv, "", "-before.dot");
1468                 co_dump_ifg_dot(co, f, style_flags);
1469                 fclose(f);
1470         }
1471
1472         /* if the algo can improve results, provide an initial solution with heur3 */
1473         if (improve && algos[algo].can_improve_existing) {
1474                 co_complete_stats_t stats;
1475
1476                 /* produce a heuristic solution */
1477 #ifdef WITH_JVM
1478                 co_solve_heuristic_java(co);
1479 #else
1480                 co_solve_heuristic(co);
1481 #endif /* WITH_JVM */
1482
1483                 /* do the stats and provide the current costs */
1484                 co_complete_stats(co, &stats);
1485                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1486         }
1487
1488 #ifdef WITH_JVM
1489         /* start the JVM here so that it does not tamper the timing. */
1490         if (algo == CO_ALGO_HEUR3)
1491                 be_java_coal_start_jvm();
1492 #endif /* WITH_JVM */
1493
1494         algo_func = algos[algo].algo;
1495
1496         /* perform actual copy minimization */
1497         lc_timer_reset_and_start(timer);
1498         was_optimal = algo_func(co);
1499         lc_timer_stop(timer);
1500
1501         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1502         be_stat_ev_ull("co_optimal", was_optimal);
1503
1504         if (dump_flags & DUMP_AFTER) {
1505                 FILE *f = my_open(cenv, "", "-after.dot");
1506                 co_dump_ifg_dot(co, f, style_flags);
1507                 fclose(f);
1508         }
1509
1510         co_complete_stats(co, &after);
1511
1512         if (do_stats) {
1513                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1514                 ulong64 evitable          = after.costs     - after.inevit_costs;
1515
1516                 ir_printf("%30F ", cenv->irg);
1517                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1518
1519                 if(optimizable_costs > 0)
1520                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1521                 else
1522                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1523         }
1524
1525         be_stat_ev_ull("co_after_costs", after.costs);
1526         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1527
1528         co_free_graph_structure(co);
1529         co_free_ou_structure(co);
1530         free_copy_opt(co);
1531 }