reduced stack overhead by transforming nodes as early as possible in functions
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "benodesets.h"
64 #include "bejavacoal.h"
65 #include "bestatevent.h"
66 #include "beirg_t.h"
67 #include "error.h"
68
69 #include <libcore/lc_timing.h>
70 #include <libcore/lc_opts.h>
71 #include <libcore/lc_opts_enum.h>
72
73 #define DUMP_BEFORE 1
74 #define DUMP_AFTER  2
75 #define DUMP_APPEL  4
76 #define DUMP_ALL    2 * DUMP_APPEL - 1
77
78 #define COST_FUNC_FREQ     1
79 #define COST_FUNC_LOOP     2
80 #define COST_FUNC_ALL_ONE  3
81
82 static unsigned   dump_flags  = 0;
83 static unsigned   style_flags = 0;
84 static unsigned   do_stats    = 0;
85 static cost_fct_t cost_func   = co_get_costs_exec_freq;
86 static unsigned   algo        = CO_ALGO_HEUR4;
87 static int        improve     = 1;
88
89 static const lc_opt_enum_mask_items_t dump_items[] = {
90         { "before",  DUMP_BEFORE },
91         { "after",   DUMP_AFTER  },
92         { "appel",   DUMP_APPEL  },
93         { "all",     DUMP_ALL    },
94         { NULL,      0 }
95 };
96
97 static const lc_opt_enum_mask_items_t style_items[] = {
98         { "color",   CO_IFG_DUMP_COLORS },
99         { "labels",  CO_IFG_DUMP_LABELS },
100         { "constr",  CO_IFG_DUMP_CONSTR },
101         { "shape",   CO_IFG_DUMP_SHAPE  },
102         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
103         { NULL,      0 }
104 };
105
106 static const lc_opt_enum_mask_items_t algo_items[] = {
107         { "none",   CO_ALGO_NONE  },
108         { "heur",   CO_ALGO_HEUR  },
109         { "heur2",  CO_ALGO_HEUR2 },
110 #ifdef WITH_JVM
111         { "heur3",  CO_ALGO_HEUR3 },
112 #endif /* WITH_JVM */
113         { "heur4",  CO_ALGO_HEUR4 },
114 #ifdef WITH_ILP
115         { "ilp",    CO_ALGO_ILP   },
116 #endif /* WITH_ILP */
117         { NULL,     0 }
118 };
119
120 typedef int (*opt_funcptr)(void);
121
122 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
123         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
124         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
125         { "one",    (opt_funcptr) co_get_costs_all_one },
126         { NULL,     NULL }
127 };
128
129 static lc_opt_enum_mask_var_t dump_var = {
130         &dump_flags, dump_items
131 };
132
133 static lc_opt_enum_mask_var_t style_var = {
134         &style_flags, style_items
135 };
136
137 static lc_opt_enum_mask_var_t algo_var = {
138         &algo, algo_items
139 };
140
141 static lc_opt_enum_func_ptr_var_t cost_func_var = {
142         (opt_funcptr*) &cost_func, cost_func_items
143 };
144
145 static const lc_opt_table_entry_t options[] = {
146         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
147         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
148         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
149         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
150         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
151         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
152         { NULL }
153 };
154
155 /* Insert additional options registration functions here. */
156 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
157 extern void be_co2_register_options(lc_opt_entry_t *grp);
158 extern void be_co3_register_options(lc_opt_entry_t *grp);
159
160 void be_init_copycoal(void)
161 {
162         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
163         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
164         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
165         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
166
167         lc_opt_add_table(co_grp, options);
168 }
169
170 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
171
172 #undef QUICK_AND_DIRTY_HACK
173
174 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
175 {
176         if (env->ifg)
177                 return be_ifg_connected(env->ifg, a, b);
178         else
179                 return values_interfere(env->birg, a, b);
180 }
181
182
183 /******************************************************************************
184     _____                           _
185    / ____|                         | |
186   | |  __  ___ _ __   ___ _ __ __ _| |
187   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
188   | |__| |  __/ | | |  __/ | | (_| | |
189    \_____|\___|_| |_|\___|_|  \__,_|_|
190
191  ******************************************************************************/
192
193 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
194
195
196 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
197 {
198         const char *s1, *s2, *s3;
199         int len;
200         copy_opt_t *co;
201
202         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
203
204         co = xcalloc(1, sizeof(*co));
205         co->cenv      = chordal_env;
206         co->aenv      = chordal_env->birg->main_env->arch_env;
207         co->irg       = chordal_env->irg;
208         co->cls       = chordal_env->cls;
209         co->get_costs = get_costs;
210
211         s1 = get_irp_prog_name();
212         s2 = get_entity_name(get_irg_entity(co->irg));
213         s3 = chordal_env->cls->name;
214         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
215         co->name = xmalloc(len);
216         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
217
218         return co;
219 }
220
221 void free_copy_opt(copy_opt_t *co) {
222         xfree(co->name);
223         free(co);
224 }
225
226 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
227         const arch_register_req_t *req;
228         const arch_register_t *reg;
229
230         if (arch_irn_is(co->aenv, irn, ignore))
231                 return 0;
232
233         reg = arch_get_irn_register(co->aenv, irn);
234         if (arch_register_type_is(reg, ignore))
235                 return 0;
236
237         req = arch_get_register_req(co->aenv, irn, -1);
238         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
239                 return 1;
240
241         return 0;
242 }
243
244 int co_is_optimizable_arg(const copy_opt_t *co, ir_node *irn) {
245         const ir_edge_t *edge;
246         const arch_register_t *reg;
247
248         assert(0 && "Is buggy and obsolete. Do not use");
249
250         if (arch_irn_is(co->aenv, irn, ignore))
251                 return 0;
252
253         reg = arch_get_irn_register(co->aenv, irn);
254         if (arch_register_type_is(reg, ignore))
255                 return 0;
256
257         foreach_out_edge(irn, edge) {
258                 ir_node *n = edge->src;
259
260                 if (!nodes_interfere(co->cenv, irn, n) || irn == n) {
261                         const arch_register_req_t *req;
262                         req = arch_get_register_req(co->aenv, n, -1);
263
264                         if(is_Reg_Phi(n) ||
265                            is_Perm(co->aenv, n) ||
266                            (arch_register_req_is(req, should_be_same))) {
267                                 ir_node *other = get_irn_n(irn, req->other_same);
268                                 if(other == irn)
269                                         return 1;
270                         }
271                 }
272         }
273
274         return 0;
275 }
276
277 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
278         int cost = 0;
279         ir_loop *loop;
280         ir_node *root_block = get_nodes_block(root);
281
282         if (is_Phi(root)) {
283                 /* for phis the copies are placed in the corresponding pred-block */
284                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
285         } else {
286                 /* a perm places the copy in the same block as it resides */
287                 loop = get_irn_loop(root_block);
288         }
289         if (loop) {
290                 int d = get_loop_depth(loop);
291                 cost = d*d;
292         }
293         return 1+cost;
294 }
295
296 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
297         int res;
298         ir_node *root_bl = get_nodes_block(root);
299         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
300         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
301
302         /* don't allow values smaller than one. */
303         return res < 1 ? 1 : res;
304 }
305
306
307 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
308         return 1;
309 }
310
311 /******************************************************************************
312    ____        _   _    _       _ _          _____ _
313   / __ \      | | | |  | |     (_) |        / ____| |
314  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
315  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
316  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
317   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
318         | |                                                            __/ |
319         |_|                                                           |___/
320  ******************************************************************************/
321
322 /**
323  * Determines a maximum weighted independent set with respect to
324  * the interference and conflict edges of all nodes in a qnode.
325  */
326 static int ou_max_ind_set_costs(unit_t *ou) {
327         be_chordal_env_t *chordal_env = ou->co->cenv;
328         ir_node **safe, **unsafe;
329         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
330         bitset_t *curr;
331         int max, pos, curr_weight, best_weight = 0;
332
333         /* assign the nodes into two groups.
334          * safe: node has no interference, hence it is in every max stable set.
335          * unsafe: node has an interference
336          */
337         safe = alloca((ou->node_count-1) * sizeof(*safe));
338         safe_costs = 0;
339         safe_count = 0;
340         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
341         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
342         unsafe_count = 0;
343         for(i=1; i<ou->node_count; ++i) {
344                 int is_safe = 1;
345                 for(o=1; o<ou->node_count; ++o) {
346                         if (i==o)
347                                 continue;
348                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
349                                 unsafe_costs[unsafe_count] = ou->costs[i];
350                                 unsafe[unsafe_count] = ou->nodes[i];
351                                 ++unsafe_count;
352                                 is_safe = 0;
353                                 break;
354                         }
355                 }
356                 if (is_safe) {
357                         safe_costs += ou->costs[i];
358                         safe[safe_count++] = ou->nodes[i];
359                 }
360         }
361
362
363         /* now compute the best set out of the unsafe nodes*/
364         if (unsafe_count > MIS_HEUR_TRIGGER) {
365                 bitset_t *best = bitset_alloca(unsafe_count);
366                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
367                 for (i=0; i<unsafe_count; ++i) {
368                         bitset_set(best, i);
369                         /* check if it is a stable set */
370                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
371                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
372                                         bitset_clear(best, i); /* clear the bit and try next one */
373                                         break;
374                                 }
375                 }
376                 /* compute the weight */
377                 bitset_foreach(best, pos)
378                         best_weight += unsafe_costs[pos];
379         } else {
380                 /* Exact Algorithm: Brute force */
381                 curr = bitset_alloca(unsafe_count);
382                 bitset_set_all(curr);
383                 while ((max = bitset_popcnt(curr)) != 0) {
384                         /* check if curr is a stable set */
385                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
386                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
387                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
388                                                         goto no_stable_set;
389
390                         /* if we arrive here, we have a stable set */
391                         /* compute the weigth of the stable set*/
392                         curr_weight = 0;
393                         bitset_foreach(curr, pos)
394                                 curr_weight += unsafe_costs[pos];
395
396                         /* any better ? */
397                         if (curr_weight > best_weight) {
398                                 best_weight = curr_weight;
399                         }
400
401         no_stable_set:
402                         bitset_minus1(curr);
403                 }
404         }
405
406         return safe_costs+best_weight;
407 }
408
409 static void co_collect_units(ir_node *irn, void *env) {
410         copy_opt_t *co = env;
411         unit_t *unit;
412
413         if (!is_curr_reg_class(co, irn))
414                 return;
415         if (!co_is_optimizable_root(co, irn))
416                 return;
417
418         /* Init a new unit */
419         unit = xcalloc(1, sizeof(*unit));
420         unit->co = co;
421         unit->node_count = 1;
422         INIT_LIST_HEAD(&unit->queue);
423
424         /* Phi with some/all of its arguments */
425         if (is_Reg_Phi(irn)) {
426                 int i, arity;
427
428                 /* init */
429                 arity = get_irn_arity(irn);
430                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
431                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
432                 unit->nodes[0] = irn;
433
434                 /* fill */
435                 for (i=0; i<arity; ++i) {
436                         int o, arg_pos;
437                         ir_node *arg = get_irn_n(irn, i);
438
439                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
440                         if (arg == irn)
441                                 continue;
442                         if (nodes_interfere(co->cenv, irn, arg)) {
443                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
444                                 continue;
445                         }
446
447                         /* Else insert the argument of the phi to the members of this ou */
448                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
449
450                         if (! arch_irn_is(co->aenv, arg, ignore)) {
451                                 /* Check if arg has occurred at a prior position in the arg/list */
452                                 arg_pos = 0;
453                                 for (o=1; o<unit->node_count; ++o) {
454                                         if (unit->nodes[o] == arg) {
455                                                 arg_pos = o;
456                                                 break;
457                                         }
458                                 }
459
460                                 if (!arg_pos) { /* a new argument */
461                                         /* insert node, set costs */
462                                         unit->nodes[unit->node_count] = arg;
463                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
464                                         unit->node_count++;
465                                 } else { /* arg has occured before in same phi */
466                                         /* increase costs for existing arg */
467                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
468                                 }
469                         }
470                 }
471                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
472                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
473         } else
474
475         /* Proj of a perm with corresponding arg */
476         if (is_Perm_Proj(co->aenv, irn)) {
477                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
478                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
479                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
480                 unit->node_count = 2;
481                 unit->nodes[0] = irn;
482                 unit->nodes[1] = get_Perm_src(irn);
483                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
484         } else {
485                 const arch_register_req_t *req =
486                         arch_get_register_req(co->aenv, irn, -1);
487
488                 /* Src == Tgt of a 2-addr-code instruction */
489                 if (is_2addr_code(req)) {
490                         ir_node *other = get_irn_n(irn, req->other_same);
491                         if (!arch_irn_is(co->aenv, other, ignore) &&
492                                         !nodes_interfere(co->cenv, irn, other)) {
493                                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
494                                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
495                                 unit->node_count = 2;
496                                 unit->nodes[0] = irn;
497                                 unit->nodes[1] = other;
498                                 unit->costs[1] = co->get_costs(co, irn, other, -1);
499                         }
500                 } else {
501                         assert(0 && "This is not an optimizable node!");
502                 }
503         }
504
505         /* Insert the new unit at a position according to its costs */
506         if (unit->node_count > 1) {
507                 int i;
508                 struct list_head *tmp;
509
510                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
511                 for(i=1; i<unit->node_count; ++i) {
512                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
513                         unit->all_nodes_costs += unit->costs[i];
514                 }
515
516                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
517                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
518                 /* Insert the new ou according to its sort_key */
519                 tmp = &co->units;
520                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
521                         tmp = tmp->next;
522                 list_add(&unit->units, tmp);
523         } else {
524                 free(unit);
525         }
526 }
527
528 #ifdef QUICK_AND_DIRTY_HACK
529
530 static int compare_ous(const void *k1, const void *k2) {
531         const unit_t *u1 = *((const unit_t **) k1);
532         const unit_t *u2 = *((const unit_t **) k2);
533         int i, o, u1_has_constr, u2_has_constr;
534         arch_register_req_t req;
535         const arch_env_t *aenv = u1->co->aenv;
536
537         /* Units with constraints come first */
538         u1_has_constr = 0;
539         for (i=0; i<u1->node_count; ++i) {
540                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
541                 if (arch_register_req_is(&req, limited)) {
542                         u1_has_constr = 1;
543                         break;
544                 }
545         }
546
547         u2_has_constr = 0;
548         for (i=0; i<u2->node_count; ++i) {
549                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
550                 if (arch_register_req_is(&req, limited)) {
551                         u2_has_constr = 1;
552                         break;
553                 }
554         }
555
556         if (u1_has_constr != u2_has_constr)
557                 return u2_has_constr - u1_has_constr;
558
559         /* Now check, whether the two units are connected */
560 #if 0
561         for (i=0; i<u1->node_count; ++i)
562                 for (o=0; o<u2->node_count; ++o)
563                         if (u1->nodes[i] == u2->nodes[o])
564                                 return 0;
565 #endif
566
567         /* After all, the sort key decides. Greater keys come first. */
568         return u2->sort_key - u1->sort_key;
569
570 }
571
572 /**
573  * Sort the ou's according to constraints and their sort_key
574  */
575 static void co_sort_units(copy_opt_t *co) {
576         int i, count = 0, costs;
577         unit_t *ou, **ous;
578
579         /* get the number of ous, remove them form the list and fill the array */
580         list_for_each_entry(unit_t, ou, &co->units, units)
581                 count++;
582         ous = alloca(count * sizeof(*ous));
583
584         costs = co_get_max_copy_costs(co);
585
586         i = 0;
587         list_for_each_entry(unit_t, ou, &co->units, units)
588                 ous[i++] = ou;
589
590         INIT_LIST_HEAD(&co->units);
591
592         assert(count == i && list_empty(&co->units));
593
594         for (i=0; i<count; ++i)
595                 ir_printf("%+F\n", ous[i]->nodes[0]);
596
597         qsort(ous, count, sizeof(*ous), compare_ous);
598
599         ir_printf("\n\n");
600         for (i=0; i<count; ++i)
601                 ir_printf("%+F\n", ous[i]->nodes[0]);
602
603         /* reinsert into list in correct order */
604         for (i=0; i<count; ++i)
605                 list_add_tail(&ous[i]->units, &co->units);
606
607         assert(costs == co_get_max_copy_costs(co));
608 }
609 #endif
610
611 void co_build_ou_structure(copy_opt_t *co) {
612         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
613         INIT_LIST_HEAD(&co->units);
614         irg_walk_graph(co->irg, co_collect_units, NULL, co);
615 #ifdef QUICK_AND_DIRTY_HACK
616         co_sort_units(co);
617 #endif
618 }
619
620 void co_free_ou_structure(copy_opt_t *co) {
621         unit_t *curr, *tmp;
622         ASSERT_OU_AVAIL(co);
623         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
624                 xfree(curr->nodes);
625                 xfree(curr->costs);
626                 xfree(curr);
627         }
628         co->units.next = NULL;
629 }
630
631 /* co_solve_heuristic() is implemented in becopyheur.c */
632
633 int co_get_max_copy_costs(const copy_opt_t *co) {
634         int i, res = 0;
635         unit_t *curr;
636
637         ASSERT_OU_AVAIL(co);
638
639         list_for_each_entry(unit_t, curr, &co->units, units) {
640                 res += curr->inevitable_costs;
641                 for (i=1; i<curr->node_count; ++i)
642                         res += curr->costs[i];
643         }
644         return res;
645 }
646
647 int co_get_inevit_copy_costs(const copy_opt_t *co) {
648         int res = 0;
649         unit_t *curr;
650
651         ASSERT_OU_AVAIL(co);
652
653         list_for_each_entry(unit_t, curr, &co->units, units)
654                 res += curr->inevitable_costs;
655         return res;
656 }
657
658 int co_get_copy_costs(const copy_opt_t *co) {
659         int i, res = 0;
660         unit_t *curr;
661
662         ASSERT_OU_AVAIL(co);
663
664         list_for_each_entry(unit_t, curr, &co->units, units) {
665                 int root_col = get_irn_col(co, curr->nodes[0]);
666                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
667                 res += curr->inevitable_costs;
668                 for (i=1; i<curr->node_count; ++i) {
669                         int arg_col = get_irn_col(co, curr->nodes[i]);
670                         if (root_col != arg_col) {
671                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
672                                 res += curr->costs[i];
673                         }
674                 }
675         }
676         return res;
677 }
678
679 int co_get_lower_bound(const copy_opt_t *co) {
680         int res = 0;
681         unit_t *curr;
682
683         ASSERT_OU_AVAIL(co);
684
685         list_for_each_entry(unit_t, curr, &co->units, units)
686                 res += curr->inevitable_costs + curr->min_nodes_costs;
687         return res;
688 }
689
690 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
691 {
692         bitset_t *seen = bitset_irg_malloc(co->irg);
693         affinity_node_t *an;
694
695         memset(stat, 0, sizeof(stat[0]));
696
697         /* count affinity edges. */
698         co_gs_foreach_aff_node(co, an) {
699                 neighb_t *neigh;
700                 stat->aff_nodes += 1;
701                 bitset_add_irn(seen, an->irn);
702                 co_gs_foreach_neighb(an, neigh) {
703                         if(!bitset_contains_irn(seen, neigh->irn)) {
704                                 stat->aff_edges += 1;
705                                 stat->max_costs += neigh->costs;
706
707                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
708                                         stat->costs += neigh->costs;
709                                         stat->unsatisfied_edges += 1;
710                                 }
711
712                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
713                                         stat->aff_int += 1;
714                                         stat->inevit_costs += neigh->costs;
715                                 }
716
717                         }
718                 }
719         }
720
721         bitset_free(seen);
722 }
723
724 /******************************************************************************
725    _____                 _        _____ _
726   / ____|               | |      / ____| |
727  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
728  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
729  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
730   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
731                   | |                                       __/ |
732                   |_|                                      |___/
733  ******************************************************************************/
734
735 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
736         const affinity_node_t *n1 = k1;
737         const affinity_node_t *n2 = k2;
738
739         return (n1->irn != n2->irn);
740 }
741
742 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
743         affinity_node_t new_node, *node;
744         neighb_t new_nbr, *nbr;
745         int allocnew;
746
747         new_node.irn        = n1;
748         new_node.degree     = 0;
749         new_node.neighbours = NULL;
750         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
751
752         allocnew = 1;
753         for (nbr = node->neighbours; nbr; nbr = nbr->next)
754                 if (nbr->irn == n2) {
755                         allocnew = 0;
756                         break;
757                 }
758
759         /* if we did not find n2 in n1's neighbourhood insert it */
760         if (allocnew) {
761                 obstack_grow(&co->obst, &new_nbr, sizeof(new_nbr));
762                 nbr = obstack_finish(&co->obst);
763                 nbr->irn   = n2;
764                 nbr->costs = 0;
765                 nbr->next  = node->neighbours;
766                 node->neighbours = nbr;
767                 node->degree++;
768         }
769
770         /* now nbr points to n1's neighbour-entry of n2 */
771         nbr->costs += costs;
772 }
773
774 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
775         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
776                 add_edge(co, n1, n2, costs);
777                 add_edge(co, n2, n1, costs);
778         }
779 }
780
781 static void build_graph_walker(ir_node *irn, void *env) {
782         copy_opt_t *co = env;
783         int pos, max;
784         const arch_register_t *reg;
785
786         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
787                 return;
788
789         reg = arch_get_irn_register(co->aenv, irn);
790         if (arch_register_type_is(reg, ignore))
791                 return;
792
793         /* Phis */
794         if (is_Reg_Phi(irn))
795                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
796                         ir_node *arg = get_irn_n(irn, pos);
797                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
798                 }
799
800         /* Perms */
801         else if (is_Perm_Proj(co->aenv, irn)) {
802                 ir_node *arg = get_Perm_src(irn);
803                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
804         }
805
806         /* 2-address code */
807         else {
808                 const arch_register_req_t *req =
809                         arch_get_register_req(co->aenv, irn, -1);
810                 if (is_2addr_code(req)) {
811                         ir_node *other = get_irn_n(irn, req->other_same);
812                         if(!arch_irn_is(co->aenv, other, ignore))
813                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
814                 }
815         }
816 }
817
818 void co_build_graph_structure(copy_opt_t *co) {
819         obstack_init(&co->obst);
820         co->nodes = new_set(compare_affinity_node_t, 32);
821
822         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
823 }
824
825 void co_free_graph_structure(copy_opt_t *co) {
826         ASSERT_GS_AVAIL(co);
827
828         del_set(co->nodes);
829         obstack_free(&co->obst, NULL);
830         co->nodes = NULL;
831 }
832
833 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
834
835 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
836         affinity_node_t new_node, *n;
837
838         ASSERT_GS_AVAIL(co);
839
840         new_node.irn = irn;
841         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
842         if (n) {
843                 return (n->degree > 0);
844         } else
845                 return 0;
846 }
847
848 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
849 {
850         be_ifg_t *ifg  = co->cenv->ifg;
851         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
852
853         ir_node *irn;
854         void *it, *nit;
855         int i, n, n_regs;
856
857         n_regs = 0;
858         for(i = 0; i < co->cls->n_regs; ++i) {
859                 const arch_register_t *reg = &co->cls->regs[i];
860                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
861         }
862
863         /*
864          * n contains the first node number.
865          * the values below n are the pre-colored register nodes
866          */
867
868         it  = be_ifg_nodes_iter_alloca(ifg);
869         nit = be_ifg_neighbours_iter_alloca(ifg);
870
871         n = n_regs;
872         be_ifg_foreach_node(ifg, it, irn) {
873                 if(!arch_irn_is(co->aenv, irn, ignore))
874                         set_irn_link(irn, INT_TO_PTR(n++));
875         }
876
877         fprintf(f, "%d %d\n", n, n_regs);
878
879         be_ifg_foreach_node(ifg, it, irn) {
880                 if(!arch_irn_is(co->aenv, irn, ignore)) {
881                         int idx            = PTR_TO_INT(get_irn_link(irn));
882                         affinity_node_t *a = get_affinity_info(co, irn);
883
884                         const arch_register_req_t *req;
885                         ir_node *adj;
886
887                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
888                         if(arch_register_req_is(req, limited)) {
889                                 for(i = 0; i < co->cls->n_regs; ++i) {
890                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
891                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
892                                 }
893                         }
894
895
896                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
897                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
898                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
899                                         if(idx < adj_idx)
900                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
901                                 }
902                         }
903
904                         if(a) {
905                                 neighb_t *n;
906
907                                 co_gs_foreach_neighb(a, n) {
908                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
909                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
910                                                 if(idx < n_idx)
911                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
912                                         }
913                                 }
914                         }
915                 }
916         }
917 }
918
919 typedef struct _appel_clique_walker_t {
920         ir_phase ph;
921         const copy_opt_t *co;
922         int curr_nr;
923         int node_count;
924         FILE *f;
925         int dumb;
926         int *color_map;
927         struct obstack obst;
928 } appel_clique_walker_t;
929
930 typedef struct _appel_block_info_t {
931         int *live_end_nr;
932         int *live_in_nr;
933         int *phi_nr;
934         ir_node **live_end;
935         ir_node **live_in;
936         ir_node **phi;
937         int n_live_end;
938         int n_live_in;
939         int n_phi;
940 } appel_block_info_t;
941
942 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
943 {
944 #if 0
945         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
946         int res = (int) freq;
947         return res == 0 ? 1 : res;
948 #else
949         ir_loop *loop = get_irn_loop(bl);
950         if(loop) {
951                 int d = get_loop_depth(loop);
952                 return 1 + d * d;
953         }
954         return 1;
955 #endif
956 }
957
958 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
959 {
960         appel_block_info_t *res = NULL;
961
962         if(is_Block(irn)) {
963                 appel_clique_walker_t *d = (void *) phase;
964                 res = phase_alloc(phase, sizeof(res[0]));
965                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
966                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
967                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
968                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
969                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
970                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
971         }
972
973         return res;
974 }
975
976 typedef struct _insn_list_t {
977         be_insn_t *insn;
978         struct list_head list;
979 } insn_list_t;
980
981 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
982 {
983         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
984         int i;
985
986         for(i = 0; i < bli->n_live_end; ++i)
987                 if(bli->live_end[i] == irn)
988                         return bli->live_end_nr[i];
989
990         return -1;
991 }
992
993 static int appel_dump_clique(appel_clique_walker_t *env, pset *live, ir_node *bl, int curr_nr, int start_nr)
994 {
995         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
996         ir_node *irn;
997         int n_live;
998         int j;
999
1000         n_live = 0;
1001         foreach_pset(live, irn)
1002                 live_arr[n_live++] = irn;
1003
1004         /* dump the live after clique */
1005         if(!env->dumb) {
1006                 for(j = 0; j < n_live; ++j) {
1007                         int k;
1008
1009                         for(k = j + 1; k < n_live; ++k) {
1010                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
1011                         }
1012                         fprintf(env->f, "\n");
1013                 }
1014         }
1015
1016         /* dump the affinities */
1017         for(j = 0; !env->dumb && j < n_live; ++j) {
1018                 ir_node *irn = live_arr[j];
1019                 int old_nr = PTR_TO_INT(get_irn_link(irn));
1020
1021                 /* if the node was already live in the last insn dump the affinity */
1022                 if(old_nr > start_nr) {
1023                         int weight = appel_aff_weight(env, bl);
1024                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
1025                 }
1026         }
1027
1028         /* set the current numbers into the link field. */
1029         for(j = 0; j < n_live; ++j) {
1030                 ir_node *irn = live_arr[j];
1031                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1032         }
1033
1034         return curr_nr + n_live;
1035 }
1036
1037 static void appel_walker(ir_node *bl, void *data)
1038 {
1039         appel_clique_walker_t *env = data;
1040         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1041         struct obstack *obst       = &env->obst;
1042         void *base                 = obstack_base(obst);
1043         pset *live                 = pset_new_ptr_default();
1044         be_lv_t *lv                = env->co->cenv->birg->lv;
1045
1046         int n_insns  = 0;
1047         int n_nodes  = 0;
1048         int start_nr = env->curr_nr;
1049         int curr_nr  = start_nr;
1050
1051         be_insn_env_t insn_env;
1052         int i, j;
1053         ir_node *irn;
1054         be_insn_t **insns;
1055
1056         insn_env.aenv = env->co->aenv;
1057         insn_env.cls  = env->co->cls;
1058         insn_env.obst = obst;
1059         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1060
1061         /* Guess how many insns will be in this block. */
1062         sched_foreach(bl, irn)
1063                 n_nodes++;
1064
1065         bli->n_phi = 0;
1066         insns = malloc(n_nodes * sizeof(insns[0]));
1067
1068         /* Put all insns in an array. */
1069         irn = sched_first(bl);
1070         while(!sched_is_end(irn)) {
1071                 be_insn_t *insn;
1072                 insn = be_scan_insn(&insn_env, irn);
1073                 insns[n_insns++] = insn;
1074                 irn = insn->next_insn;
1075         }
1076
1077         DBG((dbg, LEVEL_2, "%+F\n", bl));
1078         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, live);
1079
1080         /* Generate the bad and ugly. */
1081         for(i = n_insns - 1; i >= 0; --i) {
1082                 be_insn_t *insn = insns[i];
1083
1084                 /* The first live set has to be saved in the block border set. */
1085                 if(i == n_insns - 1) {
1086                         j = 0;
1087                         foreach_pset(live, irn) {
1088                                 bli->live_end[j]    = irn;
1089                                 bli->live_end_nr[j] = curr_nr + j;
1090                                 ++j;
1091                         }
1092                         bli->n_live_end = j;
1093                 }
1094
1095                 if(!env->dumb) {
1096                         for(j = 0; j < insn->use_start; ++j) {
1097                                 ir_node *op   = insn->ops[j].carrier;
1098                                 bitset_t *adm = insn->ops[j].regs;
1099                                 int k;
1100                                 int nr;
1101
1102                                 if(!insn->ops[j].has_constraints)
1103                                         continue;
1104
1105                                 nr = 0;
1106                                 foreach_pset(live, irn) {
1107                                         if(irn == op) {
1108                                                 pset_break(live);
1109                                                 break;
1110                                         }
1111                                         ++nr;
1112                                 }
1113
1114                                 assert(nr < pset_count(live));
1115
1116                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1117                                         int mapped_col = env->color_map[k];
1118                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1119                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1120                                 }
1121                         }
1122                 }
1123
1124                 /* dump the clique and update the stuff. */
1125                 curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1126
1127                 /* remove all defs. */
1128                 for(j = 0; j < insn->use_start; ++j)
1129                         pset_remove_ptr(live, insn->ops[j].carrier);
1130
1131                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1132                         bli->phi[bli->n_phi]    = insn->irn;
1133                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1134                         bli->n_phi++;
1135                 }
1136
1137                 /* add all uses */
1138                 else
1139                         for(j = insn->use_start; j < insn->n_ops; ++j)
1140                                 pset_insert_ptr(live, insn->ops[j].carrier);
1141         }
1142
1143         /* print the start clique. */
1144         curr_nr = appel_dump_clique(env, live, bl, curr_nr, start_nr);
1145
1146         i = 0;
1147         foreach_pset(live, irn) {
1148                 bli->live_in[i]    = irn;
1149                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1150                 ++i;
1151         }
1152         bli->n_live_in = i;
1153
1154         del_pset(live);
1155         free(insns);
1156         obstack_free(obst, base);
1157         env->curr_nr = curr_nr;
1158 }
1159
1160 static void appel_inter_block_aff(ir_node *bl, void *data)
1161 {
1162         appel_clique_walker_t *env = data;
1163         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1164
1165         int i, j, n;
1166
1167         for(i = 0; i < bli->n_live_in; ++i) {
1168                 ir_node *irn = bli->live_in[i];
1169
1170                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1171                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1172
1173                         int nr = appel_get_live_end_nr(env, pred, irn);
1174                         assert(nr >= 0);
1175                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1176                 }
1177         }
1178
1179         for(i = 0; i < bli->n_phi; ++i) {
1180                 ir_node *irn = bli->phi[i];
1181
1182                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1183                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1184                         ir_node *op = get_irn_n(irn, j);
1185
1186                         int nr = appel_get_live_end_nr(env, pred, op);
1187                         assert(nr >= 0);
1188                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1189                 }
1190         }
1191
1192 }
1193
1194 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1195 {
1196         int i;
1197         int n_colors;
1198         appel_clique_walker_t env;
1199         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1200         be_lv_t *lv = co->cenv->birg->lv;
1201
1202         be_liveness_recompute(lv);
1203         obstack_init(&env.obst);
1204         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1205         env.curr_nr = co->cls->n_regs;
1206         env.co = co;
1207         env.f = f;
1208
1209         bitset_copy(adm, co->cenv->ignore_colors);
1210         bitset_flip_all(adm);
1211
1212         /* Make color map. */
1213         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1214         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1215                 const arch_register_t *reg = &co->cls->regs[i];
1216                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1217         }
1218
1219         env.dumb = 1;
1220         env.curr_nr = n_colors;
1221         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1222         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1223
1224         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1225
1226         /* make the first k nodes interfere */
1227         for(i = 0; i < n_colors; ++i) {
1228                 int j;
1229                 for(j = i + 1; j < n_colors; ++j)
1230                         fprintf(f, "%d %d -1 ", i, j);
1231                 fprintf(f, "\n");
1232         }
1233
1234         env.dumb = 0;
1235         env.curr_nr = n_colors;
1236         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1237         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1238         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1239         obstack_free(&env.obst, NULL);
1240 }
1241
1242 /*
1243          ___ _____ ____   ____   ___ _____   ____                        _
1244         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1245          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1246          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1247         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1248                                                                   |_|            |___/
1249 */
1250
1251 static const char *get_dot_color_name(int col)
1252 {
1253         static const char *names[] = {
1254                 "blue",
1255                 "red",
1256                 "green",
1257                 "yellow",
1258                 "cyan",
1259                 "magenta",
1260                 "orange",
1261                 "chocolate",
1262                 "beige",
1263                 "navy",
1264                 "darkgreen",
1265                 "darkred",
1266                 "lightPink",
1267                 "chartreuse",
1268                 "lightskyblue",
1269                 "linen",
1270                 "pink",
1271                 "lightslateblue",
1272                 "mintcream",
1273                 "red",
1274                 "darkolivegreen",
1275                 "mediumblue",
1276                 "mistyrose",
1277                 "salmon",
1278                 "darkseagreen",
1279                 "mediumslateblue"
1280                 "moccasin",
1281                 "tomato",
1282                 "forestgreen",
1283                 "darkturquoise",
1284                 "palevioletred"
1285         };
1286
1287         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1288 }
1289
1290 typedef struct _co_ifg_dump_t {
1291         const copy_opt_t *co;
1292         unsigned flags;
1293 } co_ifg_dump_t;
1294
1295 static void ifg_dump_graph_attr(FILE *f, void *self)
1296 {
1297         fprintf(f, "overlap=scale");
1298 }
1299
1300 static int ifg_is_dump_node(void *self, ir_node *irn)
1301 {
1302         co_ifg_dump_t *cod = self;
1303         return !arch_irn_is(cod->co->aenv, irn, ignore);
1304 }
1305
1306 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1307 {
1308         co_ifg_dump_t *env         = self;
1309         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1310         const arch_register_req_t *req;
1311         int limited;
1312
1313         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1314         limited = arch_register_req_is(req, limited);
1315
1316         if(env->flags & CO_IFG_DUMP_LABELS) {
1317                 ir_fprintf(f, "label=\"%+F", irn);
1318
1319 #if 0
1320                 // TODO fix this...
1321                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1322                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1323                         req.limited(req.limited_env, bs);
1324                         ir_fprintf(f, "\\n%B", bs);
1325                 }
1326 #endif
1327                 ir_fprintf(f, "\" ");
1328         } else {
1329                 fprintf(f, "label=\"\" shape=point " );
1330         }
1331
1332         if(env->flags & CO_IFG_DUMP_SHAPE)
1333                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1334
1335         if(env->flags & CO_IFG_DUMP_COLORS)
1336                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1337 }
1338
1339 static void ifg_dump_at_end(FILE *file, void *self)
1340 {
1341         co_ifg_dump_t *env = self;
1342         affinity_node_t *a;
1343
1344         co_gs_foreach_aff_node(env->co, a) {
1345                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1346                 unsigned aidx = get_irn_idx(a->irn);
1347                 neighb_t *n;
1348
1349                 co_gs_foreach_neighb(a, n) {
1350                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1351                         unsigned nidx = get_irn_idx(n->irn);
1352
1353                         if(aidx < nidx) {
1354                                 const char *color = nr == ar ? "blue" : "red";
1355                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1356                                 if(env->flags & CO_IFG_DUMP_LABELS)
1357                                         fprintf(file, "label=\"%d\" ", n->costs);
1358                                 if(env->flags & CO_IFG_DUMP_COLORS)
1359                                         fprintf(file, "color=%s ", color);
1360                                 else
1361                                         fprintf(file, "style=dotted");
1362                                 fprintf(file, "];\n");
1363                         }
1364                 }
1365         }
1366 }
1367
1368
1369 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1370         ifg_is_dump_node,
1371         ifg_dump_graph_attr,
1372         ifg_dump_node_attr,
1373         NULL,
1374         NULL,
1375         ifg_dump_at_end
1376 };
1377
1378
1379
1380 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1381 {
1382         co_ifg_dump_t cod;
1383
1384         cod.co    = co;
1385         cod.flags = flags;
1386         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1387 }
1388
1389
1390 void co_solve_park_moon(copy_opt_t *opt)
1391 {
1392
1393 }
1394
1395 static int void_algo(copy_opt_t *co)
1396 {
1397         return 0;
1398 }
1399
1400 /*
1401                 _    _                  _ _   _
1402            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1403           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1404          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1405         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1406                            |___/
1407 */
1408
1409 typedef struct {
1410         co_algo_t  *algo;
1411         const char *name;
1412         int        can_improve_existing;
1413 } co_algo_info_t;
1414
1415 static co_algo_info_t algos[] = {
1416         { void_algo,               "none",  0 },
1417         { co_solve_heuristic,      "heur1", 0 },
1418         { co_solve_heuristic_new,  "heur2", 0 },
1419         { co_solve_heuristic_java, "heur3", 0 },
1420         { co_solve_heuristic_mst,  "heur4", 0 },
1421 #ifdef WITH_ILP
1422         { co_solve_ilp2,           "ilp",   1 },
1423 #endif
1424         { NULL,                    "",      0 }
1425 };
1426
1427 /*
1428     __  __       _         ____       _
1429    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1430    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1431    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1432    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1433
1434 */
1435
1436 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1437 {
1438         FILE *result;
1439         char buf[1024];
1440
1441         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1442         result = fopen(buf, "wt");
1443         if(result == NULL) {
1444                 panic("Couldn't open '%s' for writing.", buf);
1445         }
1446
1447         return result;
1448 }
1449
1450 void co_driver(be_chordal_env_t *cenv)
1451 {
1452         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1453         co_complete_stats_t before, after;
1454         copy_opt_t          *co;
1455         co_algo_t           *algo_func;
1456         int                 was_optimal = 0;
1457
1458         if (algo < 0 || algo >= CO_ALGO_LAST)
1459                 return;
1460
1461         co = new_copy_opt(cenv, cost_func);
1462         co_build_ou_structure(co);
1463         co_build_graph_structure(co);
1464
1465         co_complete_stats(co, &before);
1466
1467         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1468         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1469         be_stat_ev_ull("co_max_costs",    before.max_costs);
1470         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1471         be_stat_ev_ull("co_aff_int",      before.aff_int);
1472
1473         be_stat_ev_ull("co_init_costs",   before.costs);
1474         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1475
1476         /* Dump the interference graph in Appel's format. */
1477         if (dump_flags & DUMP_APPEL) {
1478                 FILE *f = my_open(cenv, "", ".apl");
1479                 co_dump_appel_graph(co, f);
1480                 fclose(f);
1481         }
1482
1483         if (dump_flags & DUMP_BEFORE) {
1484                 FILE *f = my_open(cenv, "", "-before.dot");
1485                 co_dump_ifg_dot(co, f, style_flags);
1486                 fclose(f);
1487         }
1488
1489         /* if the algo can improve results, provide an initial solution with heur3 */
1490         if (improve && algos[algo].can_improve_existing) {
1491                 co_complete_stats_t stats;
1492
1493                 /* produce a heuristic solution */
1494 #ifdef WITH_JVM
1495                 co_solve_heuristic_java(co);
1496 #else
1497                 co_solve_heuristic(co);
1498 #endif /* WITH_JVM */
1499
1500                 /* do the stats and provide the current costs */
1501                 co_complete_stats(co, &stats);
1502                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1503         }
1504
1505 #ifdef WITH_JVM
1506         /* start the JVM here so that it does not tamper the timing. */
1507         if (algo == CO_ALGO_HEUR3)
1508                 be_java_coal_start_jvm();
1509 #endif /* WITH_JVM */
1510
1511         algo_func = algos[algo].algo;
1512
1513         /* perform actual copy minimization */
1514         lc_timer_reset_and_start(timer);
1515         was_optimal = algo_func(co);
1516         lc_timer_stop(timer);
1517
1518         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1519         be_stat_ev_ull("co_optimal", was_optimal);
1520
1521         if (dump_flags & DUMP_AFTER) {
1522                 FILE *f = my_open(cenv, "", "-after.dot");
1523                 co_dump_ifg_dot(co, f, style_flags);
1524                 fclose(f);
1525         }
1526
1527         co_complete_stats(co, &after);
1528
1529         if (do_stats) {
1530                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1531                 ulong64 evitable          = after.costs     - after.inevit_costs;
1532
1533                 ir_printf("%30F ", cenv->irg);
1534                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1535
1536                 if(optimizable_costs > 0)
1537                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1538                 else
1539                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1540         }
1541
1542         be_stat_ev_ull("co_after_costs", after.costs);
1543         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1544
1545         co_free_graph_structure(co);
1546         co_free_ou_structure(co);
1547         free_copy_opt(co);
1548 }