Inline gen_Minus_ex() into its only caller gen_Minus().
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "benodesets.h"
64 #include "bejavacoal.h"
65 #include "bestatevent.h"
66 #include "beirg_t.h"
67 #include "error.h"
68
69 #include <libcore/lc_timing.h>
70 #include <libcore/lc_opts.h>
71 #include <libcore/lc_opts_enum.h>
72
73 #define DUMP_BEFORE 1
74 #define DUMP_AFTER  2
75 #define DUMP_APPEL  4
76 #define DUMP_ALL    2 * DUMP_APPEL - 1
77
78 #define COST_FUNC_FREQ     1
79 #define COST_FUNC_LOOP     2
80 #define COST_FUNC_ALL_ONE  3
81
82 static unsigned   dump_flags  = 0;
83 static unsigned   style_flags = 0;
84 static unsigned   do_stats    = 0;
85 static cost_fct_t cost_func   = co_get_costs_exec_freq;
86 static unsigned   algo        = CO_ALGO_HEUR4;
87 static int        improve     = 1;
88
89 static const lc_opt_enum_mask_items_t dump_items[] = {
90         { "before",  DUMP_BEFORE },
91         { "after",   DUMP_AFTER  },
92         { "appel",   DUMP_APPEL  },
93         { "all",     DUMP_ALL    },
94         { NULL,      0 }
95 };
96
97 static const lc_opt_enum_mask_items_t style_items[] = {
98         { "color",   CO_IFG_DUMP_COLORS },
99         { "labels",  CO_IFG_DUMP_LABELS },
100         { "constr",  CO_IFG_DUMP_CONSTR },
101         { "shape",   CO_IFG_DUMP_SHAPE  },
102         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
103         { NULL,      0 }
104 };
105
106 static const lc_opt_enum_mask_items_t algo_items[] = {
107         { "none",   CO_ALGO_NONE  },
108         { "heur",   CO_ALGO_HEUR  },
109         { "heur2",  CO_ALGO_HEUR2 },
110 #ifdef WITH_JVM
111         { "heur3",  CO_ALGO_HEUR3 },
112 #endif /* WITH_JVM */
113         { "heur4",  CO_ALGO_HEUR4 },
114 #ifdef WITH_ILP
115         { "ilp",    CO_ALGO_ILP   },
116 #endif /* WITH_ILP */
117         { NULL,     0 }
118 };
119
120 typedef int (*opt_funcptr)(void);
121
122 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
123         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
124         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
125         { "one",    (opt_funcptr) co_get_costs_all_one },
126         { NULL,     NULL }
127 };
128
129 static lc_opt_enum_mask_var_t dump_var = {
130         &dump_flags, dump_items
131 };
132
133 static lc_opt_enum_mask_var_t style_var = {
134         &style_flags, style_items
135 };
136
137 static lc_opt_enum_mask_var_t algo_var = {
138         &algo, algo_items
139 };
140
141 static lc_opt_enum_func_ptr_var_t cost_func_var = {
142         (opt_funcptr*) &cost_func, cost_func_items
143 };
144
145 static const lc_opt_table_entry_t options[] = {
146         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
147         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
148         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
149         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
150         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
151         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
152         LC_OPT_LAST
153 };
154
155 /* Insert additional options registration functions here. */
156 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
157 extern void be_co2_register_options(lc_opt_entry_t *grp);
158 extern void be_co3_register_options(lc_opt_entry_t *grp);
159
160 void be_init_copycoal(void)
161 {
162         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
163         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
164         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
165         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
166
167         lc_opt_add_table(co_grp, options);
168 }
169
170 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
171
172 #undef QUICK_AND_DIRTY_HACK
173
174 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
175 {
176         if (env->ifg)
177                 return be_ifg_connected(env->ifg, a, b);
178         else
179                 return values_interfere(env->birg, a, b);
180 }
181
182
183 /******************************************************************************
184     _____                           _
185    / ____|                         | |
186   | |  __  ___ _ __   ___ _ __ __ _| |
187   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
188   | |__| |  __/ | | |  __/ | | (_| | |
189    \_____|\___|_| |_|\___|_|  \__,_|_|
190
191  ******************************************************************************/
192
193 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
194
195
196 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
197 {
198         const char *s1, *s2, *s3;
199         int len;
200         copy_opt_t *co;
201
202         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
203
204         co = xcalloc(1, sizeof(*co));
205         co->cenv      = chordal_env;
206         co->aenv      = chordal_env->birg->main_env->arch_env;
207         co->irg       = chordal_env->irg;
208         co->cls       = chordal_env->cls;
209         co->get_costs = get_costs;
210
211         s1 = get_irp_prog_name();
212         s2 = get_entity_name(get_irg_entity(co->irg));
213         s3 = chordal_env->cls->name;
214         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
215         co->name = xmalloc(len);
216         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
217
218         return co;
219 }
220
221 void free_copy_opt(copy_opt_t *co) {
222         xfree(co->name);
223         free(co);
224 }
225
226 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
227         const arch_register_req_t *req;
228         const arch_register_t *reg;
229
230         if (arch_irn_is(co->aenv, irn, ignore))
231                 return 0;
232
233         reg = arch_get_irn_register(co->aenv, irn);
234         if (arch_register_type_is(reg, ignore))
235                 return 0;
236
237         req = arch_get_register_req(co->aenv, irn, -1);
238         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
239                 return 1;
240
241         return 0;
242 }
243
244 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
245         int cost = 0;
246         ir_loop *loop;
247         ir_node *root_block = get_nodes_block(root);
248         (void) co;
249         (void) arg;
250
251         if (is_Phi(root)) {
252                 /* for phis the copies are placed in the corresponding pred-block */
253                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
254         } else {
255                 /* a perm places the copy in the same block as it resides */
256                 loop = get_irn_loop(root_block);
257         }
258         if (loop) {
259                 int d = get_loop_depth(loop);
260                 cost = d*d;
261         }
262         return 1+cost;
263 }
264
265 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
266         int res;
267         ir_node *root_bl = get_nodes_block(root);
268         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
269         (void) arg;
270         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
271
272         /* don't allow values smaller than one. */
273         return res < 1 ? 1 : res;
274 }
275
276
277 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
278         (void) co;
279         (void) root;
280         (void) arg;
281         (void) pos;
282         return 1;
283 }
284
285 /******************************************************************************
286    ____        _   _    _       _ _          _____ _
287   / __ \      | | | |  | |     (_) |        / ____| |
288  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
289  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
290  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
291   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
292         | |                                                            __/ |
293         |_|                                                           |___/
294  ******************************************************************************/
295
296 /**
297  * Determines a maximum weighted independent set with respect to
298  * the interference and conflict edges of all nodes in a qnode.
299  */
300 static int ou_max_ind_set_costs(unit_t *ou) {
301         be_chordal_env_t *chordal_env = ou->co->cenv;
302         ir_node **safe, **unsafe;
303         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
304         bitset_t *curr;
305         bitset_pos_t pos;
306         int max, curr_weight, best_weight = 0;
307
308         /* assign the nodes into two groups.
309          * safe: node has no interference, hence it is in every max stable set.
310          * unsafe: node has an interference
311          */
312         safe = alloca((ou->node_count-1) * sizeof(*safe));
313         safe_costs = 0;
314         safe_count = 0;
315         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
316         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
317         unsafe_count = 0;
318         for(i=1; i<ou->node_count; ++i) {
319                 int is_safe = 1;
320                 for(o=1; o<ou->node_count; ++o) {
321                         if (i==o)
322                                 continue;
323                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
324                                 unsafe_costs[unsafe_count] = ou->costs[i];
325                                 unsafe[unsafe_count] = ou->nodes[i];
326                                 ++unsafe_count;
327                                 is_safe = 0;
328                                 break;
329                         }
330                 }
331                 if (is_safe) {
332                         safe_costs += ou->costs[i];
333                         safe[safe_count++] = ou->nodes[i];
334                 }
335         }
336
337
338         /* now compute the best set out of the unsafe nodes*/
339         if (unsafe_count > MIS_HEUR_TRIGGER) {
340                 bitset_t *best = bitset_alloca(unsafe_count);
341                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
342                 for (i=0; i<unsafe_count; ++i) {
343                         bitset_set(best, i);
344                         /* check if it is a stable set */
345                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
346                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
347                                         bitset_clear(best, i); /* clear the bit and try next one */
348                                         break;
349                                 }
350                 }
351                 /* compute the weight */
352                 bitset_foreach(best, pos)
353                         best_weight += unsafe_costs[pos];
354         } else {
355                 /* Exact Algorithm: Brute force */
356                 curr = bitset_alloca(unsafe_count);
357                 bitset_set_all(curr);
358                 while ((max = bitset_popcnt(curr)) != 0) {
359                         /* check if curr is a stable set */
360                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
361                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
362                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
363                                                         goto no_stable_set;
364
365                         /* if we arrive here, we have a stable set */
366                         /* compute the weigth of the stable set*/
367                         curr_weight = 0;
368                         bitset_foreach(curr, pos)
369                                 curr_weight += unsafe_costs[pos];
370
371                         /* any better ? */
372                         if (curr_weight > best_weight) {
373                                 best_weight = curr_weight;
374                         }
375
376         no_stable_set:
377                         bitset_minus1(curr);
378                 }
379         }
380
381         return safe_costs+best_weight;
382 }
383
384 static void co_collect_units(ir_node *irn, void *env) {
385         copy_opt_t *co = env;
386         unit_t *unit;
387
388         if (!is_curr_reg_class(co, irn))
389                 return;
390         if (!co_is_optimizable_root(co, irn))
391                 return;
392
393         /* Init a new unit */
394         unit = xcalloc(1, sizeof(*unit));
395         unit->co = co;
396         unit->node_count = 1;
397         INIT_LIST_HEAD(&unit->queue);
398
399         /* Phi with some/all of its arguments */
400         if (is_Reg_Phi(irn)) {
401                 int i, arity;
402
403                 /* init */
404                 arity = get_irn_arity(irn);
405                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
406                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
407                 unit->nodes[0] = irn;
408
409                 /* fill */
410                 for (i=0; i<arity; ++i) {
411                         int o, arg_pos;
412                         ir_node *arg = get_irn_n(irn, i);
413
414                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
415                         if (arg == irn)
416                                 continue;
417                         if (nodes_interfere(co->cenv, irn, arg)) {
418                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
419                                 continue;
420                         }
421
422                         /* Else insert the argument of the phi to the members of this ou */
423                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
424
425                         if (! arch_irn_is(co->aenv, arg, ignore)) {
426                                 /* Check if arg has occurred at a prior position in the arg/list */
427                                 arg_pos = 0;
428                                 for (o=1; o<unit->node_count; ++o) {
429                                         if (unit->nodes[o] == arg) {
430                                                 arg_pos = o;
431                                                 break;
432                                         }
433                                 }
434
435                                 if (!arg_pos) { /* a new argument */
436                                         /* insert node, set costs */
437                                         unit->nodes[unit->node_count] = arg;
438                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
439                                         unit->node_count++;
440                                 } else { /* arg has occurred before in same phi */
441                                         /* increase costs for existing arg */
442                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
443                                 }
444                         }
445                 }
446                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
447                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
448         } else if (is_Perm_Proj(co->aenv, irn)) {
449                 /* Proj of a perm with corresponding arg */
450                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
451                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
452                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
453                 unit->node_count = 2;
454                 unit->nodes[0] = irn;
455                 unit->nodes[1] = get_Perm_src(irn);
456                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
457         } else {
458                 const arch_register_req_t *req =
459                         arch_get_register_req(co->aenv, irn, -1);
460
461                 /* Src == Tgt of a 2-addr-code instruction */
462                 if (is_2addr_code(req)) {
463                         ir_node *other  = get_irn_n(skip_Proj(irn), req->other_same[0]);
464                         ir_node *other2 = NULL;
465                         int      count;
466
467                         if (arch_irn_is(co->aenv, other, ignore) ||
468                                         nodes_interfere(co->cenv, irn, other)) {
469                                 other = NULL;
470                         }
471                         if (req->other_same[1] != -1) {
472                                 other2 = get_irn_n(skip_Proj(irn), req->other_same[1]);
473                                 if (arch_irn_is(co->aenv, other2, ignore) ||
474                                                 nodes_interfere(co->cenv, irn, other2)) {
475                                         other2 = NULL;
476                                 }
477                         }
478                         count = 1 + (other != NULL) + (other2 != NULL && other != other2);
479
480                         if (count > 1) {
481                                 int i = 0;
482
483                                 unit->nodes = xmalloc(count * sizeof(*unit->nodes));
484                                 unit->costs = xmalloc(count * sizeof(*unit->costs));
485                                 unit->node_count = count;
486                                 unit->nodes[i] = irn;
487                                 if (other != NULL) {
488                                         ++i;
489                                         unit->nodes[i] = other;
490                                         unit->costs[i] = co->get_costs(co, irn, other, -1);
491                                 }
492                                 if (other2 != NULL) {
493                                         if (other == other2) {
494                                                 unit->costs[i] += co->get_costs(co, irn, other2, -1);
495                                         } else {
496                                                 ++i;
497                                                 unit->nodes[i] = other2;
498                                                 unit->costs[i] = co->get_costs(co, irn, other2, -1);
499                                         }
500                                 }
501                         }
502                 } else {
503                         assert(0 && "This is not an optimizable node!");
504                 }
505         }
506
507         /* Insert the new unit at a position according to its costs */
508         if (unit->node_count > 1) {
509                 int i;
510                 struct list_head *tmp;
511
512                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
513                 for(i=1; i<unit->node_count; ++i) {
514                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
515                         unit->all_nodes_costs += unit->costs[i];
516                 }
517
518                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
519                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
520                 /* Insert the new ou according to its sort_key */
521                 tmp = &co->units;
522                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
523                         tmp = tmp->next;
524                 list_add(&unit->units, tmp);
525         } else {
526                 free(unit);
527         }
528 }
529
530 #ifdef QUICK_AND_DIRTY_HACK
531
532 static int compare_ous(const void *k1, const void *k2) {
533         const unit_t *u1 = *((const unit_t **) k1);
534         const unit_t *u2 = *((const unit_t **) k2);
535         int i, o, u1_has_constr, u2_has_constr;
536         arch_register_req_t req;
537         const arch_env_t *aenv = u1->co->aenv;
538
539         /* Units with constraints come first */
540         u1_has_constr = 0;
541         for (i=0; i<u1->node_count; ++i) {
542                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
543                 if (arch_register_req_is(&req, limited)) {
544                         u1_has_constr = 1;
545                         break;
546                 }
547         }
548
549         u2_has_constr = 0;
550         for (i=0; i<u2->node_count; ++i) {
551                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
552                 if (arch_register_req_is(&req, limited)) {
553                         u2_has_constr = 1;
554                         break;
555                 }
556         }
557
558         if (u1_has_constr != u2_has_constr)
559                 return u2_has_constr - u1_has_constr;
560
561         /* Now check, whether the two units are connected */
562 #if 0
563         for (i=0; i<u1->node_count; ++i)
564                 for (o=0; o<u2->node_count; ++o)
565                         if (u1->nodes[i] == u2->nodes[o])
566                                 return 0;
567 #endif
568
569         /* After all, the sort key decides. Greater keys come first. */
570         return u2->sort_key - u1->sort_key;
571
572 }
573
574 /**
575  * Sort the ou's according to constraints and their sort_key
576  */
577 static void co_sort_units(copy_opt_t *co) {
578         int i, count = 0, costs;
579         unit_t *ou, **ous;
580
581         /* get the number of ous, remove them form the list and fill the array */
582         list_for_each_entry(unit_t, ou, &co->units, units)
583                 count++;
584         ous = alloca(count * sizeof(*ous));
585
586         costs = co_get_max_copy_costs(co);
587
588         i = 0;
589         list_for_each_entry(unit_t, ou, &co->units, units)
590                 ous[i++] = ou;
591
592         INIT_LIST_HEAD(&co->units);
593
594         assert(count == i && list_empty(&co->units));
595
596         for (i=0; i<count; ++i)
597                 ir_printf("%+F\n", ous[i]->nodes[0]);
598
599         qsort(ous, count, sizeof(*ous), compare_ous);
600
601         ir_printf("\n\n");
602         for (i=0; i<count; ++i)
603                 ir_printf("%+F\n", ous[i]->nodes[0]);
604
605         /* reinsert into list in correct order */
606         for (i=0; i<count; ++i)
607                 list_add_tail(&ous[i]->units, &co->units);
608
609         assert(costs == co_get_max_copy_costs(co));
610 }
611 #endif
612
613 void co_build_ou_structure(copy_opt_t *co) {
614         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
615         INIT_LIST_HEAD(&co->units);
616         irg_walk_graph(co->irg, co_collect_units, NULL, co);
617 #ifdef QUICK_AND_DIRTY_HACK
618         co_sort_units(co);
619 #endif
620 }
621
622 void co_free_ou_structure(copy_opt_t *co) {
623         unit_t *curr, *tmp;
624         ASSERT_OU_AVAIL(co);
625         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
626                 xfree(curr->nodes);
627                 xfree(curr->costs);
628                 xfree(curr);
629         }
630         co->units.next = NULL;
631 }
632
633 /* co_solve_heuristic() is implemented in becopyheur.c */
634
635 int co_get_max_copy_costs(const copy_opt_t *co) {
636         int i, res = 0;
637         unit_t *curr;
638
639         ASSERT_OU_AVAIL(co);
640
641         list_for_each_entry(unit_t, curr, &co->units, units) {
642                 res += curr->inevitable_costs;
643                 for (i=1; i<curr->node_count; ++i)
644                         res += curr->costs[i];
645         }
646         return res;
647 }
648
649 int co_get_inevit_copy_costs(const copy_opt_t *co) {
650         int res = 0;
651         unit_t *curr;
652
653         ASSERT_OU_AVAIL(co);
654
655         list_for_each_entry(unit_t, curr, &co->units, units)
656                 res += curr->inevitable_costs;
657         return res;
658 }
659
660 int co_get_copy_costs(const copy_opt_t *co) {
661         int i, res = 0;
662         unit_t *curr;
663
664         ASSERT_OU_AVAIL(co);
665
666         list_for_each_entry(unit_t, curr, &co->units, units) {
667                 int root_col = get_irn_col(co, curr->nodes[0]);
668                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
669                 res += curr->inevitable_costs;
670                 for (i=1; i<curr->node_count; ++i) {
671                         int arg_col = get_irn_col(co, curr->nodes[i]);
672                         if (root_col != arg_col) {
673                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
674                                 res += curr->costs[i];
675                         }
676                 }
677         }
678         return res;
679 }
680
681 int co_get_lower_bound(const copy_opt_t *co) {
682         int res = 0;
683         unit_t *curr;
684
685         ASSERT_OU_AVAIL(co);
686
687         list_for_each_entry(unit_t, curr, &co->units, units)
688                 res += curr->inevitable_costs + curr->min_nodes_costs;
689         return res;
690 }
691
692 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
693 {
694         bitset_t *seen = bitset_irg_malloc(co->irg);
695         affinity_node_t *an;
696
697         memset(stat, 0, sizeof(stat[0]));
698
699         /* count affinity edges. */
700         co_gs_foreach_aff_node(co, an) {
701                 neighb_t *neigh;
702                 stat->aff_nodes += 1;
703                 bitset_add_irn(seen, an->irn);
704                 co_gs_foreach_neighb(an, neigh) {
705                         if(!bitset_contains_irn(seen, neigh->irn)) {
706                                 stat->aff_edges += 1;
707                                 stat->max_costs += neigh->costs;
708
709                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
710                                         stat->costs += neigh->costs;
711                                         stat->unsatisfied_edges += 1;
712                                 }
713
714                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
715                                         stat->aff_int += 1;
716                                         stat->inevit_costs += neigh->costs;
717                                 }
718
719                         }
720                 }
721         }
722
723         bitset_free(seen);
724 }
725
726 /******************************************************************************
727    _____                 _        _____ _
728   / ____|               | |      / ____| |
729  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
730  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
731  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
732   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
733                   | |                                       __/ |
734                   |_|                                      |___/
735  ******************************************************************************/
736
737 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
738         const affinity_node_t *n1 = k1;
739         const affinity_node_t *n2 = k2;
740         (void) size;
741
742         return (n1->irn != n2->irn);
743 }
744
745 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
746         affinity_node_t new_node, *node;
747         neighb_t        *nbr;
748         int             allocnew = 1;
749
750         new_node.irn        = n1;
751         new_node.degree     = 0;
752         new_node.neighbours = NULL;
753         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
754
755         for (nbr = node->neighbours; nbr; nbr = nbr->next)
756                 if (nbr->irn == n2) {
757                         allocnew = 0;
758                         break;
759                 }
760
761         /* if we did not find n2 in n1's neighbourhood insert it */
762         if (allocnew) {
763                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
764                 nbr->irn   = n2;
765                 nbr->costs = 0;
766                 nbr->next  = node->neighbours;
767
768                 node->neighbours = nbr;
769                 node->degree++;
770         }
771
772         /* now nbr points to n1's neighbour-entry of n2 */
773         nbr->costs += costs;
774 }
775
776 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
777         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
778                 add_edge(co, n1, n2, costs);
779                 add_edge(co, n2, n1, costs);
780         }
781 }
782
783 static void build_graph_walker(ir_node *irn, void *env) {
784         copy_opt_t *co = env;
785         int pos, max;
786         const arch_register_t *reg;
787
788         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
789                 return;
790
791         reg = arch_get_irn_register(co->aenv, irn);
792         if (arch_register_type_is(reg, ignore))
793                 return;
794
795         if (is_Reg_Phi(irn)) { /* Phis */
796                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
797                         ir_node *arg = get_irn_n(irn, pos);
798                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
799                 }
800         }
801         else if (is_Perm_Proj(co->aenv, irn)) { /* Perms */
802                 ir_node *arg = get_Perm_src(irn);
803                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
804         }
805         else { /* 2-address code */
806                 const arch_register_req_t *req = arch_get_register_req(co->aenv, irn, -1);
807                 if (is_2addr_code(req)) {
808                         const int *i;
809                         for (i = req->other_same; i != ENDOF(req->other_same); ++i) {
810                                 ir_node *other;
811
812                                 if (*i == -1) break;
813
814                                 other = get_irn_n(skip_Proj(irn), *i);
815                                 if (! arch_irn_is(co->aenv, other, ignore))
816                                         add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
817                         }
818                 }
819         }
820 }
821
822 void co_build_graph_structure(copy_opt_t *co) {
823         obstack_init(&co->obst);
824         co->nodes = new_set(compare_affinity_node_t, 32);
825
826         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
827 }
828
829 void co_free_graph_structure(copy_opt_t *co) {
830         ASSERT_GS_AVAIL(co);
831
832         del_set(co->nodes);
833         obstack_free(&co->obst, NULL);
834         co->nodes = NULL;
835 }
836
837 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
838
839 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
840         affinity_node_t new_node, *n;
841
842         ASSERT_GS_AVAIL(co);
843
844         new_node.irn = irn;
845         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
846         if (n) {
847                 return (n->degree > 0);
848         } else
849                 return 0;
850 }
851
852 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
853 {
854         be_ifg_t *ifg  = co->cenv->ifg;
855         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
856
857         ir_node *irn;
858         void *it, *nit;
859         int i, n, n_regs;
860
861         n_regs = 0;
862         for(i = 0; i < co->cls->n_regs; ++i) {
863                 const arch_register_t *reg = &co->cls->regs[i];
864                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
865         }
866
867         /*
868          * n contains the first node number.
869          * the values below n are the pre-colored register nodes
870          */
871
872         it  = be_ifg_nodes_iter_alloca(ifg);
873         nit = be_ifg_neighbours_iter_alloca(ifg);
874
875         n = n_regs;
876         be_ifg_foreach_node(ifg, it, irn) {
877                 if(!arch_irn_is(co->aenv, irn, ignore))
878                         set_irn_link(irn, INT_TO_PTR(n++));
879         }
880
881         fprintf(f, "%d %d\n", n, n_regs);
882
883         be_ifg_foreach_node(ifg, it, irn) {
884                 if(!arch_irn_is(co->aenv, irn, ignore)) {
885                         int idx            = PTR_TO_INT(get_irn_link(irn));
886                         affinity_node_t *a = get_affinity_info(co, irn);
887
888                         const arch_register_req_t *req;
889                         ir_node *adj;
890
891                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
892                         if(arch_register_req_is(req, limited)) {
893                                 for(i = 0; i < co->cls->n_regs; ++i) {
894                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
895                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
896                                 }
897                         }
898
899
900                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
901                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
902                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
903                                         if(idx < adj_idx)
904                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
905                                 }
906                         }
907
908                         if(a) {
909                                 neighb_t *n;
910
911                                 co_gs_foreach_neighb(a, n) {
912                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
913                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
914                                                 if(idx < n_idx)
915                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
916                                         }
917                                 }
918                         }
919                 }
920         }
921 }
922
923 typedef struct _appel_clique_walker_t {
924         ir_phase ph;
925         const copy_opt_t *co;
926         int curr_nr;
927         int node_count;
928         FILE *f;
929         int dumb;
930         int *color_map;
931         struct obstack obst;
932 } appel_clique_walker_t;
933
934 typedef struct _appel_block_info_t {
935         int *live_end_nr;
936         int *live_in_nr;
937         int *phi_nr;
938         ir_node **live_end;
939         ir_node **live_in;
940         ir_node **phi;
941         int n_live_end;
942         int n_live_in;
943         int n_phi;
944 } appel_block_info_t;
945
946 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
947 {
948 #if 0
949         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
950         int res = (int) freq;
951         return res == 0 ? 1 : res;
952 #else
953         ir_loop *loop = get_irn_loop(bl);
954         (void) env;
955         if(loop) {
956                 int d = get_loop_depth(loop);
957                 return 1 + d * d;
958         }
959         return 1;
960 #endif
961 }
962
963 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
964 {
965         appel_block_info_t *res = NULL;
966         (void) old;
967
968         if(is_Block(irn)) {
969                 appel_clique_walker_t *d = (void *) phase;
970                 res = phase_alloc(phase, sizeof(res[0]));
971                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
972                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
973                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
974                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
975                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
976                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
977         }
978
979         return res;
980 }
981
982 typedef struct _insn_list_t {
983         be_insn_t *insn;
984         struct list_head list;
985 } insn_list_t;
986
987 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
988 {
989         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
990         int i;
991
992         for(i = 0; i < bli->n_live_end; ++i)
993                 if(bli->live_end[i] == irn)
994                         return bli->live_end_nr[i];
995
996         return -1;
997 }
998
999 static int appel_dump_clique(appel_clique_walker_t *env, const ir_nodeset_t *live, ir_node *bl, int curr_nr, int start_nr)
1000 {
1001         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
1002         ir_node *irn;
1003         int n_live;
1004         int j;
1005         ir_nodeset_iterator_t iter;
1006
1007         n_live = 0;
1008         foreach_ir_nodeset(live, irn, iter)
1009                 live_arr[n_live++] = irn;
1010
1011         /* dump the live after clique */
1012         if(!env->dumb) {
1013                 for(j = 0; j < n_live; ++j) {
1014                         int k;
1015
1016                         for(k = j + 1; k < n_live; ++k) {
1017                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
1018                         }
1019                         fprintf(env->f, "\n");
1020                 }
1021         }
1022
1023         /* dump the affinities */
1024         for(j = 0; !env->dumb && j < n_live; ++j) {
1025                 ir_node *irn = live_arr[j];
1026                 int old_nr = PTR_TO_INT(get_irn_link(irn));
1027
1028                 /* if the node was already live in the last insn dump the affinity */
1029                 if(old_nr > start_nr) {
1030                         int weight = appel_aff_weight(env, bl);
1031                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
1032                 }
1033         }
1034
1035         /* set the current numbers into the link field. */
1036         for(j = 0; j < n_live; ++j) {
1037                 ir_node *irn = live_arr[j];
1038                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1039         }
1040
1041         return curr_nr + n_live;
1042 }
1043
1044 static void appel_walker(ir_node *bl, void *data)
1045 {
1046         appel_clique_walker_t *env = data;
1047         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1048         struct obstack *obst       = &env->obst;
1049         void *base                 = obstack_base(obst);
1050         ir_nodeset_t live;
1051         ir_nodeset_iterator_t iter;
1052         be_lv_t *lv                = env->co->cenv->birg->lv;
1053
1054         int n_insns  = 0;
1055         int n_nodes  = 0;
1056         int start_nr = env->curr_nr;
1057         int curr_nr  = start_nr;
1058
1059         be_insn_env_t insn_env;
1060         int i, j;
1061         ir_node *irn;
1062         be_insn_t **insns;
1063
1064         insn_env.aenv = env->co->aenv;
1065         insn_env.cls  = env->co->cls;
1066         insn_env.obst = obst;
1067         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1068
1069         /* Guess how many insns will be in this block. */
1070         sched_foreach(bl, irn)
1071                 n_nodes++;
1072
1073         bli->n_phi = 0;
1074         insns = xmalloc(n_nodes * sizeof(insns[0]));
1075
1076         /* Put all insns in an array. */
1077         irn = sched_first(bl);
1078         while(!sched_is_end(irn)) {
1079                 be_insn_t *insn;
1080                 insn = be_scan_insn(&insn_env, irn);
1081                 insns[n_insns++] = insn;
1082                 irn = insn->next_insn;
1083         }
1084
1085         DBG((dbg, LEVEL_2, "%+F\n", bl));
1086         ir_nodeset_init(&live);
1087         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, &live);
1088
1089         /* Generate the bad and ugly. */
1090         for(i = n_insns - 1; i >= 0; --i) {
1091                 be_insn_t *insn = insns[i];
1092
1093                 /* The first live set has to be saved in the block border set. */
1094                 if(i == n_insns - 1) {
1095                         j = 0;
1096                         foreach_ir_nodeset(&live, irn, iter) {
1097                                 bli->live_end[j]    = irn;
1098                                 bli->live_end_nr[j] = curr_nr + j;
1099                                 ++j;
1100                         }
1101                         bli->n_live_end = j;
1102                 }
1103
1104                 if(!env->dumb) {
1105                         for(j = 0; j < insn->use_start; ++j) {
1106                                 ir_node *op   = insn->ops[j].carrier;
1107                                 bitset_t *adm = insn->ops[j].regs;
1108                                 int k;
1109                                 size_t nr;
1110
1111                                 if(!insn->ops[j].has_constraints)
1112                                         continue;
1113
1114                                 nr = 0;
1115                                 foreach_ir_nodeset(&live, irn, iter) {
1116                                         if(irn == op) {
1117                                                 break;
1118                                         }
1119                                         ++nr;
1120                                 }
1121
1122                                 assert(nr < ir_nodeset_size(&live));
1123
1124                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1125                                         int mapped_col = env->color_map[k];
1126                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1127                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1128                                 }
1129                         }
1130                 }
1131
1132                 /* dump the clique and update the stuff. */
1133                 curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1134
1135                 /* remove all defs. */
1136                 for(j = 0; j < insn->use_start; ++j)
1137                         ir_nodeset_remove(&live, insn->ops[j].carrier);
1138
1139                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1140                         bli->phi[bli->n_phi]    = insn->irn;
1141                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1142                         bli->n_phi++;
1143                 }
1144
1145                 /* add all uses */
1146                 else
1147                         for(j = insn->use_start; j < insn->n_ops; ++j)
1148                                 ir_nodeset_insert(&live, insn->ops[j].carrier);
1149         }
1150
1151         /* print the start clique. */
1152         curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1153
1154         i = 0;
1155         foreach_ir_nodeset(&live, irn, iter) {
1156                 bli->live_in[i]    = irn;
1157                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1158                 ++i;
1159         }
1160         bli->n_live_in = i;
1161
1162         ir_nodeset_destroy(&live);
1163         free(insns);
1164         obstack_free(obst, base);
1165         env->curr_nr = curr_nr;
1166 }
1167
1168 static void appel_inter_block_aff(ir_node *bl, void *data)
1169 {
1170         appel_clique_walker_t *env = data;
1171         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1172
1173         int i, j, n;
1174
1175         for(i = 0; i < bli->n_live_in; ++i) {
1176                 ir_node *irn = bli->live_in[i];
1177
1178                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1179                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1180
1181                         int nr = appel_get_live_end_nr(env, pred, irn);
1182                         assert(nr >= 0);
1183                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1184                 }
1185         }
1186
1187         for(i = 0; i < bli->n_phi; ++i) {
1188                 ir_node *irn = bli->phi[i];
1189
1190                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1191                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1192                         ir_node *op = get_irn_n(irn, j);
1193
1194                         int nr = appel_get_live_end_nr(env, pred, op);
1195                         assert(nr >= 0);
1196                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1197                 }
1198         }
1199
1200 }
1201
1202 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1203 {
1204         int i;
1205         int n_colors;
1206         appel_clique_walker_t env;
1207         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1208         be_lv_t *lv = co->cenv->birg->lv;
1209
1210         be_liveness_recompute(lv);
1211         obstack_init(&env.obst);
1212         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1213         env.curr_nr = co->cls->n_regs;
1214         env.co = co;
1215         env.f = f;
1216
1217         bitset_copy(adm, co->cenv->ignore_colors);
1218         bitset_flip_all(adm);
1219
1220         /* Make color map. */
1221         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1222         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1223                 const arch_register_t *reg = &co->cls->regs[i];
1224                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1225         }
1226
1227         env.dumb = 1;
1228         env.curr_nr = n_colors;
1229         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1230         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1231
1232         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1233
1234         /* make the first k nodes interfere */
1235         for(i = 0; i < n_colors; ++i) {
1236                 int j;
1237                 for(j = i + 1; j < n_colors; ++j)
1238                         fprintf(f, "%d %d -1 ", i, j);
1239                 fprintf(f, "\n");
1240         }
1241
1242         env.dumb = 0;
1243         env.curr_nr = n_colors;
1244         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1245         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1246         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1247         obstack_free(&env.obst, NULL);
1248 }
1249
1250 /*
1251          ___ _____ ____   ____   ___ _____   ____                        _
1252         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1253          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1254          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1255         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1256                                                                   |_|            |___/
1257 */
1258
1259 static const char *get_dot_color_name(size_t col)
1260 {
1261         static const char *names[] = {
1262                 "blue",
1263                 "red",
1264                 "green",
1265                 "yellow",
1266                 "cyan",
1267                 "magenta",
1268                 "orange",
1269                 "chocolate",
1270                 "beige",
1271                 "navy",
1272                 "darkgreen",
1273                 "darkred",
1274                 "lightPink",
1275                 "chartreuse",
1276                 "lightskyblue",
1277                 "linen",
1278                 "pink",
1279                 "lightslateblue",
1280                 "mintcream",
1281                 "red",
1282                 "darkolivegreen",
1283                 "mediumblue",
1284                 "mistyrose",
1285                 "salmon",
1286                 "darkseagreen",
1287                 "mediumslateblue"
1288                 "moccasin",
1289                 "tomato",
1290                 "forestgreen",
1291                 "darkturquoise",
1292                 "palevioletred"
1293         };
1294
1295         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1296 }
1297
1298 typedef struct _co_ifg_dump_t {
1299         const copy_opt_t *co;
1300         unsigned flags;
1301 } co_ifg_dump_t;
1302
1303 static void ifg_dump_graph_attr(FILE *f, void *self)
1304 {
1305         (void) self;
1306         fprintf(f, "overlap=scale");
1307 }
1308
1309 static int ifg_is_dump_node(void *self, ir_node *irn)
1310 {
1311         co_ifg_dump_t *cod = self;
1312         return !arch_irn_is(cod->co->aenv, irn, ignore);
1313 }
1314
1315 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1316 {
1317         co_ifg_dump_t *env         = self;
1318         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1319         const arch_register_req_t *req;
1320         int limited;
1321
1322         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1323         limited = arch_register_req_is(req, limited);
1324
1325         if(env->flags & CO_IFG_DUMP_LABELS) {
1326                 ir_fprintf(f, "label=\"%+F", irn);
1327
1328 #if 0
1329                 // TODO fix this...
1330                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1331                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1332                         req.limited(req.limited_env, bs);
1333                         ir_fprintf(f, "\\n%B", bs);
1334                 }
1335 #endif
1336                 ir_fprintf(f, "\" ");
1337         } else {
1338                 fprintf(f, "label=\"\" shape=point " );
1339         }
1340
1341         if(env->flags & CO_IFG_DUMP_SHAPE)
1342                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1343
1344         if(env->flags & CO_IFG_DUMP_COLORS)
1345                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1346 }
1347
1348 static void ifg_dump_at_end(FILE *file, void *self)
1349 {
1350         co_ifg_dump_t *env = self;
1351         affinity_node_t *a;
1352
1353         co_gs_foreach_aff_node(env->co, a) {
1354                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1355                 unsigned aidx = get_irn_idx(a->irn);
1356                 neighb_t *n;
1357
1358                 co_gs_foreach_neighb(a, n) {
1359                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1360                         unsigned nidx = get_irn_idx(n->irn);
1361
1362                         if(aidx < nidx) {
1363                                 const char *color = nr == ar ? "blue" : "red";
1364                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1365                                 if(env->flags & CO_IFG_DUMP_LABELS)
1366                                         fprintf(file, "label=\"%d\" ", n->costs);
1367                                 if(env->flags & CO_IFG_DUMP_COLORS)
1368                                         fprintf(file, "color=%s ", color);
1369                                 else
1370                                         fprintf(file, "style=dotted");
1371                                 fprintf(file, "];\n");
1372                         }
1373                 }
1374         }
1375 }
1376
1377
1378 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1379         ifg_is_dump_node,
1380         ifg_dump_graph_attr,
1381         ifg_dump_node_attr,
1382         NULL,
1383         NULL,
1384         ifg_dump_at_end
1385 };
1386
1387
1388
1389 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1390 {
1391         co_ifg_dump_t cod;
1392
1393         cod.co    = co;
1394         cod.flags = flags;
1395         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1396 }
1397
1398
1399 void co_solve_park_moon(copy_opt_t *opt)
1400 {
1401         (void) opt;
1402 }
1403
1404 static int void_algo(copy_opt_t *co)
1405 {
1406         (void) co;
1407         return 0;
1408 }
1409
1410 /*
1411                 _    _                  _ _   _
1412            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1413           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1414          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1415         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1416                            |___/
1417 */
1418
1419 typedef struct {
1420         co_algo_t  *algo;
1421         const char *name;
1422         int        can_improve_existing;
1423 } co_algo_info_t;
1424
1425 static co_algo_info_t algos[] = {
1426         { void_algo,               "none",  0 },
1427         { co_solve_heuristic,      "heur1", 0 },
1428         { co_solve_heuristic_new,  "heur2", 0 },
1429         { co_solve_heuristic_java, "heur3", 0 },
1430         { co_solve_heuristic_mst,  "heur4", 0 },
1431 #ifdef WITH_ILP
1432         { co_solve_ilp2,           "ilp",   1 },
1433 #endif
1434         { NULL,                    "",      0 }
1435 };
1436
1437 /*
1438     __  __       _         ____       _
1439    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1440    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1441    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1442    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1443
1444 */
1445
1446 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1447 {
1448         FILE *result;
1449         char buf[1024];
1450
1451         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1452         result = fopen(buf, "wt");
1453         if(result == NULL) {
1454                 panic("Couldn't open '%s' for writing.", buf);
1455         }
1456
1457         return result;
1458 }
1459
1460 void co_driver(be_chordal_env_t *cenv)
1461 {
1462         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1463         co_complete_stats_t before, after;
1464         copy_opt_t          *co;
1465         co_algo_t           *algo_func;
1466         int                 was_optimal = 0;
1467
1468         if (algo >= CO_ALGO_LAST)
1469                 return;
1470
1471         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1472
1473         co = new_copy_opt(cenv, cost_func);
1474         co_build_ou_structure(co);
1475         co_build_graph_structure(co);
1476
1477         co_complete_stats(co, &before);
1478
1479         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1480         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1481         be_stat_ev_ull("co_max_costs",    before.max_costs);
1482         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1483         be_stat_ev_ull("co_aff_int",      before.aff_int);
1484
1485         be_stat_ev_ull("co_init_costs",   before.costs);
1486         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1487
1488         /* Dump the interference graph in Appel's format. */
1489         if (dump_flags & DUMP_APPEL) {
1490                 FILE *f = my_open(cenv, "", ".apl");
1491                 co_dump_appel_graph(co, f);
1492                 fclose(f);
1493         }
1494
1495         if (dump_flags & DUMP_BEFORE) {
1496                 FILE *f = my_open(cenv, "", "-before.dot");
1497                 co_dump_ifg_dot(co, f, style_flags);
1498                 fclose(f);
1499         }
1500
1501         /* if the algo can improve results, provide an initial solution with heur3 */
1502         if (improve && algos[algo].can_improve_existing) {
1503                 co_complete_stats_t stats;
1504
1505                 /* produce a heuristic solution */
1506 #ifdef WITH_JVM
1507                 co_solve_heuristic_java(co);
1508 #else
1509                 co_solve_heuristic(co);
1510 #endif /* WITH_JVM */
1511
1512                 /* do the stats and provide the current costs */
1513                 co_complete_stats(co, &stats);
1514                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1515         }
1516
1517 #ifdef WITH_JVM
1518         /* start the JVM here so that it does not tamper the timing. */
1519         if (algo == CO_ALGO_HEUR3)
1520                 be_java_coal_start_jvm();
1521 #endif /* WITH_JVM */
1522
1523         algo_func = algos[algo].algo;
1524
1525         /* perform actual copy minimization */
1526         lc_timer_reset_and_start(timer);
1527         was_optimal = algo_func(co);
1528         lc_timer_stop(timer);
1529
1530         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1531         be_stat_ev_ull("co_optimal", was_optimal);
1532
1533         if (dump_flags & DUMP_AFTER) {
1534                 FILE *f = my_open(cenv, "", "-after.dot");
1535                 co_dump_ifg_dot(co, f, style_flags);
1536                 fclose(f);
1537         }
1538
1539         co_complete_stats(co, &after);
1540
1541         if (do_stats) {
1542                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1543                 ulong64 evitable          = after.costs     - after.inevit_costs;
1544
1545                 ir_printf("%30F ", cenv->irg);
1546                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1547
1548                 if(optimizable_costs > 0)
1549                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1550                 else
1551                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1552         }
1553
1554         be_stat_ev_ull("co_after_costs", after.costs);
1555         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1556
1557         co_free_graph_structure(co);
1558         co_free_ou_structure(co);
1559         free_copy_opt(co);
1560 }