26d06861c1ae5d1097ff0db5aadeb44f05484f77
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2007 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "benodesets.h"
64 #include "bejavacoal.h"
65 #include "bestatevent.h"
66 #include "beirg_t.h"
67 #include "error.h"
68
69 #include <libcore/lc_timing.h>
70 #include <libcore/lc_opts.h>
71 #include <libcore/lc_opts_enum.h>
72
73 #define DUMP_BEFORE 1
74 #define DUMP_AFTER  2
75 #define DUMP_APPEL  4
76 #define DUMP_ALL    2 * DUMP_APPEL - 1
77
78 #define COST_FUNC_FREQ     1
79 #define COST_FUNC_LOOP     2
80 #define COST_FUNC_ALL_ONE  3
81
82 static unsigned   dump_flags  = 0;
83 static unsigned   style_flags = 0;
84 static unsigned   do_stats    = 0;
85 static cost_fct_t cost_func   = co_get_costs_exec_freq;
86 static unsigned   algo        = CO_ALGO_HEUR4;
87 static int        improve     = 1;
88
89 static const lc_opt_enum_mask_items_t dump_items[] = {
90         { "before",  DUMP_BEFORE },
91         { "after",   DUMP_AFTER  },
92         { "appel",   DUMP_APPEL  },
93         { "all",     DUMP_ALL    },
94         { NULL,      0 }
95 };
96
97 static const lc_opt_enum_mask_items_t style_items[] = {
98         { "color",   CO_IFG_DUMP_COLORS },
99         { "labels",  CO_IFG_DUMP_LABELS },
100         { "constr",  CO_IFG_DUMP_CONSTR },
101         { "shape",   CO_IFG_DUMP_SHAPE  },
102         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
103         { NULL,      0 }
104 };
105
106 static const lc_opt_enum_mask_items_t algo_items[] = {
107         { "none",   CO_ALGO_NONE  },
108         { "heur",   CO_ALGO_HEUR  },
109         { "heur2",  CO_ALGO_HEUR2 },
110 #ifdef WITH_JVM
111         { "heur3",  CO_ALGO_HEUR3 },
112 #endif /* WITH_JVM */
113         { "heur4",  CO_ALGO_HEUR4 },
114 #ifdef WITH_ILP
115         { "ilp",    CO_ALGO_ILP   },
116 #endif /* WITH_ILP */
117         { NULL,     0 }
118 };
119
120 typedef int (*opt_funcptr)(void);
121
122 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
123         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
124         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
125         { "one",    (opt_funcptr) co_get_costs_all_one },
126         { NULL,     NULL }
127 };
128
129 static lc_opt_enum_mask_var_t dump_var = {
130         &dump_flags, dump_items
131 };
132
133 static lc_opt_enum_mask_var_t style_var = {
134         &style_flags, style_items
135 };
136
137 static lc_opt_enum_mask_var_t algo_var = {
138         &algo, algo_items
139 };
140
141 static lc_opt_enum_func_ptr_var_t cost_func_var = {
142         (opt_funcptr*) &cost_func, cost_func_items
143 };
144
145 static const lc_opt_table_entry_t options[] = {
146         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
147         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
148         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
149         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
150         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
151         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
152         LC_OPT_LAST
153 };
154
155 /* Insert additional options registration functions here. */
156 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
157 extern void be_co2_register_options(lc_opt_entry_t *grp);
158 extern void be_co3_register_options(lc_opt_entry_t *grp);
159
160 void be_init_copycoal(void)
161 {
162         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
163         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
164         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
165         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
166
167         lc_opt_add_table(co_grp, options);
168 }
169
170 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
171
172 #undef QUICK_AND_DIRTY_HACK
173
174 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
175 {
176         if (env->ifg)
177                 return be_ifg_connected(env->ifg, a, b);
178         else
179                 return values_interfere(env->birg, a, b);
180 }
181
182
183 /******************************************************************************
184     _____                           _
185    / ____|                         | |
186   | |  __  ___ _ __   ___ _ __ __ _| |
187   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
188   | |__| |  __/ | | |  __/ | | (_| | |
189    \_____|\___|_| |_|\___|_|  \__,_|_|
190
191  ******************************************************************************/
192
193 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
194
195
196 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
197 {
198         const char *s1, *s2, *s3;
199         int len;
200         copy_opt_t *co;
201
202         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
203
204         co = xcalloc(1, sizeof(*co));
205         co->cenv      = chordal_env;
206         co->aenv      = chordal_env->birg->main_env->arch_env;
207         co->irg       = chordal_env->irg;
208         co->cls       = chordal_env->cls;
209         co->get_costs = get_costs;
210
211         s1 = get_irp_prog_name();
212         s2 = get_entity_name(get_irg_entity(co->irg));
213         s3 = chordal_env->cls->name;
214         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
215         co->name = xmalloc(len);
216         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
217
218         return co;
219 }
220
221 void free_copy_opt(copy_opt_t *co) {
222         xfree(co->name);
223         free(co);
224 }
225
226 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
227         const arch_register_req_t *req;
228         const arch_register_t *reg;
229
230         if (arch_irn_is(co->aenv, irn, ignore))
231                 return 0;
232
233         reg = arch_get_irn_register(co->aenv, irn);
234         if (arch_register_type_is(reg, ignore))
235                 return 0;
236
237         req = arch_get_register_req(co->aenv, irn, -1);
238         if (is_Reg_Phi(irn) || is_Perm_Proj(co->aenv, irn) || is_2addr_code(req))
239                 return 1;
240
241         return 0;
242 }
243
244 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
245         int cost = 0;
246         ir_loop *loop;
247         ir_node *root_block = get_nodes_block(root);
248         (void) co;
249         (void) arg;
250
251         if (is_Phi(root)) {
252                 /* for phis the copies are placed in the corresponding pred-block */
253                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
254         } else {
255                 /* a perm places the copy in the same block as it resides */
256                 loop = get_irn_loop(root_block);
257         }
258         if (loop) {
259                 int d = get_loop_depth(loop);
260                 cost = d*d;
261         }
262         return 1+cost;
263 }
264
265 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
266         int res;
267         ir_node *root_bl = get_nodes_block(root);
268         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
269         (void) arg;
270         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
271
272         /* don't allow values smaller than one. */
273         return res < 1 ? 1 : res;
274 }
275
276
277 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
278         (void) co;
279         (void) root;
280         (void) arg;
281         (void) pos;
282         return 1;
283 }
284
285 /******************************************************************************
286    ____        _   _    _       _ _          _____ _
287   / __ \      | | | |  | |     (_) |        / ____| |
288  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
289  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
290  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
291   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
292         | |                                                            __/ |
293         |_|                                                           |___/
294  ******************************************************************************/
295
296 /**
297  * Determines a maximum weighted independent set with respect to
298  * the interference and conflict edges of all nodes in a qnode.
299  */
300 static int ou_max_ind_set_costs(unit_t *ou) {
301         be_chordal_env_t *chordal_env = ou->co->cenv;
302         ir_node **safe, **unsafe;
303         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
304         bitset_t *curr;
305         bitset_pos_t pos;
306         int max, curr_weight, best_weight = 0;
307
308         /* assign the nodes into two groups.
309          * safe: node has no interference, hence it is in every max stable set.
310          * unsafe: node has an interference
311          */
312         safe = alloca((ou->node_count-1) * sizeof(*safe));
313         safe_costs = 0;
314         safe_count = 0;
315         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
316         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
317         unsafe_count = 0;
318         for(i=1; i<ou->node_count; ++i) {
319                 int is_safe = 1;
320                 for(o=1; o<ou->node_count; ++o) {
321                         if (i==o)
322                                 continue;
323                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
324                                 unsafe_costs[unsafe_count] = ou->costs[i];
325                                 unsafe[unsafe_count] = ou->nodes[i];
326                                 ++unsafe_count;
327                                 is_safe = 0;
328                                 break;
329                         }
330                 }
331                 if (is_safe) {
332                         safe_costs += ou->costs[i];
333                         safe[safe_count++] = ou->nodes[i];
334                 }
335         }
336
337
338         /* now compute the best set out of the unsafe nodes*/
339         if (unsafe_count > MIS_HEUR_TRIGGER) {
340                 bitset_t *best = bitset_alloca(unsafe_count);
341                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
342                 for (i=0; i<unsafe_count; ++i) {
343                         bitset_set(best, i);
344                         /* check if it is a stable set */
345                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
346                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
347                                         bitset_clear(best, i); /* clear the bit and try next one */
348                                         break;
349                                 }
350                 }
351                 /* compute the weight */
352                 bitset_foreach(best, pos)
353                         best_weight += unsafe_costs[pos];
354         } else {
355                 /* Exact Algorithm: Brute force */
356                 curr = bitset_alloca(unsafe_count);
357                 bitset_set_all(curr);
358                 while ((max = bitset_popcnt(curr)) != 0) {
359                         /* check if curr is a stable set */
360                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
361                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
362                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
363                                                         goto no_stable_set;
364
365                         /* if we arrive here, we have a stable set */
366                         /* compute the weigth of the stable set*/
367                         curr_weight = 0;
368                         bitset_foreach(curr, pos)
369                                 curr_weight += unsafe_costs[pos];
370
371                         /* any better ? */
372                         if (curr_weight > best_weight) {
373                                 best_weight = curr_weight;
374                         }
375
376         no_stable_set:
377                         bitset_minus1(curr);
378                 }
379         }
380
381         return safe_costs+best_weight;
382 }
383
384 static void co_collect_units(ir_node *irn, void *env) {
385         copy_opt_t *co = env;
386         unit_t *unit;
387
388         if (!is_curr_reg_class(co, irn))
389                 return;
390         if (!co_is_optimizable_root(co, irn))
391                 return;
392
393         /* Init a new unit */
394         unit = xcalloc(1, sizeof(*unit));
395         unit->co = co;
396         unit->node_count = 1;
397         INIT_LIST_HEAD(&unit->queue);
398
399         /* Phi with some/all of its arguments */
400         if (is_Reg_Phi(irn)) {
401                 int i, arity;
402
403                 /* init */
404                 arity = get_irn_arity(irn);
405                 unit->nodes = xmalloc((arity+1) * sizeof(*unit->nodes));
406                 unit->costs = xmalloc((arity+1) * sizeof(*unit->costs));
407                 unit->nodes[0] = irn;
408
409                 /* fill */
410                 for (i=0; i<arity; ++i) {
411                         int o, arg_pos;
412                         ir_node *arg = get_irn_n(irn, i);
413
414                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
415                         if (arg == irn)
416                                 continue;
417                         if (nodes_interfere(co->cenv, irn, arg)) {
418                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
419                                 continue;
420                         }
421
422                         /* Else insert the argument of the phi to the members of this ou */
423                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
424
425                         if (! arch_irn_is(co->aenv, arg, ignore)) {
426                                 /* Check if arg has occurred at a prior position in the arg/list */
427                                 arg_pos = 0;
428                                 for (o=1; o<unit->node_count; ++o) {
429                                         if (unit->nodes[o] == arg) {
430                                                 arg_pos = o;
431                                                 break;
432                                         }
433                                 }
434
435                                 if (!arg_pos) { /* a new argument */
436                                         /* insert node, set costs */
437                                         unit->nodes[unit->node_count] = arg;
438                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
439                                         unit->node_count++;
440                                 } else { /* arg has occurred before in same phi */
441                                         /* increase costs for existing arg */
442                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
443                                 }
444                         }
445                 }
446                 unit->nodes = xrealloc(unit->nodes, unit->node_count * sizeof(*unit->nodes));
447                 unit->costs = xrealloc(unit->costs, unit->node_count * sizeof(*unit->costs));
448         } else
449
450         /* Proj of a perm with corresponding arg */
451         if (is_Perm_Proj(co->aenv, irn)) {
452                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
453                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
454                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
455                 unit->node_count = 2;
456                 unit->nodes[0] = irn;
457                 unit->nodes[1] = get_Perm_src(irn);
458                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
459         } else {
460                 const arch_register_req_t *req =
461                         arch_get_register_req(co->aenv, irn, -1);
462
463                 /* Src == Tgt of a 2-addr-code instruction */
464                 if (is_2addr_code(req)) {
465                         ir_node *other = get_irn_n(skip_Proj(irn), req->other_same[0]); /* TODO handle second should-be-same constraint */
466                         if (!arch_irn_is(co->aenv, other, ignore) &&
467                                         !nodes_interfere(co->cenv, irn, other)) {
468                                 unit->nodes = xmalloc(2 * sizeof(*unit->nodes));
469                                 unit->costs = xmalloc(2 * sizeof(*unit->costs));
470                                 unit->node_count = 2;
471                                 unit->nodes[0] = irn;
472                                 unit->nodes[1] = other;
473                                 unit->costs[1] = co->get_costs(co, irn, other, -1);
474                         }
475                 } else {
476                         assert(0 && "This is not an optimizable node!");
477                 }
478         }
479
480         /* Insert the new unit at a position according to its costs */
481         if (unit->node_count > 1) {
482                 int i;
483                 struct list_head *tmp;
484
485                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
486                 for(i=1; i<unit->node_count; ++i) {
487                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
488                         unit->all_nodes_costs += unit->costs[i];
489                 }
490
491                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
492                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
493                 /* Insert the new ou according to its sort_key */
494                 tmp = &co->units;
495                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
496                         tmp = tmp->next;
497                 list_add(&unit->units, tmp);
498         } else {
499                 free(unit);
500         }
501 }
502
503 #ifdef QUICK_AND_DIRTY_HACK
504
505 static int compare_ous(const void *k1, const void *k2) {
506         const unit_t *u1 = *((const unit_t **) k1);
507         const unit_t *u2 = *((const unit_t **) k2);
508         int i, o, u1_has_constr, u2_has_constr;
509         arch_register_req_t req;
510         const arch_env_t *aenv = u1->co->aenv;
511
512         /* Units with constraints come first */
513         u1_has_constr = 0;
514         for (i=0; i<u1->node_count; ++i) {
515                 arch_get_register_req(aenv, &req, u1->nodes[i], -1);
516                 if (arch_register_req_is(&req, limited)) {
517                         u1_has_constr = 1;
518                         break;
519                 }
520         }
521
522         u2_has_constr = 0;
523         for (i=0; i<u2->node_count; ++i) {
524                 arch_get_register_req(aenv, &req, u2->nodes[i], -1);
525                 if (arch_register_req_is(&req, limited)) {
526                         u2_has_constr = 1;
527                         break;
528                 }
529         }
530
531         if (u1_has_constr != u2_has_constr)
532                 return u2_has_constr - u1_has_constr;
533
534         /* Now check, whether the two units are connected */
535 #if 0
536         for (i=0; i<u1->node_count; ++i)
537                 for (o=0; o<u2->node_count; ++o)
538                         if (u1->nodes[i] == u2->nodes[o])
539                                 return 0;
540 #endif
541
542         /* After all, the sort key decides. Greater keys come first. */
543         return u2->sort_key - u1->sort_key;
544
545 }
546
547 /**
548  * Sort the ou's according to constraints and their sort_key
549  */
550 static void co_sort_units(copy_opt_t *co) {
551         int i, count = 0, costs;
552         unit_t *ou, **ous;
553
554         /* get the number of ous, remove them form the list and fill the array */
555         list_for_each_entry(unit_t, ou, &co->units, units)
556                 count++;
557         ous = alloca(count * sizeof(*ous));
558
559         costs = co_get_max_copy_costs(co);
560
561         i = 0;
562         list_for_each_entry(unit_t, ou, &co->units, units)
563                 ous[i++] = ou;
564
565         INIT_LIST_HEAD(&co->units);
566
567         assert(count == i && list_empty(&co->units));
568
569         for (i=0; i<count; ++i)
570                 ir_printf("%+F\n", ous[i]->nodes[0]);
571
572         qsort(ous, count, sizeof(*ous), compare_ous);
573
574         ir_printf("\n\n");
575         for (i=0; i<count; ++i)
576                 ir_printf("%+F\n", ous[i]->nodes[0]);
577
578         /* reinsert into list in correct order */
579         for (i=0; i<count; ++i)
580                 list_add_tail(&ous[i]->units, &co->units);
581
582         assert(costs == co_get_max_copy_costs(co));
583 }
584 #endif
585
586 void co_build_ou_structure(copy_opt_t *co) {
587         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
588         INIT_LIST_HEAD(&co->units);
589         irg_walk_graph(co->irg, co_collect_units, NULL, co);
590 #ifdef QUICK_AND_DIRTY_HACK
591         co_sort_units(co);
592 #endif
593 }
594
595 void co_free_ou_structure(copy_opt_t *co) {
596         unit_t *curr, *tmp;
597         ASSERT_OU_AVAIL(co);
598         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
599                 xfree(curr->nodes);
600                 xfree(curr->costs);
601                 xfree(curr);
602         }
603         co->units.next = NULL;
604 }
605
606 /* co_solve_heuristic() is implemented in becopyheur.c */
607
608 int co_get_max_copy_costs(const copy_opt_t *co) {
609         int i, res = 0;
610         unit_t *curr;
611
612         ASSERT_OU_AVAIL(co);
613
614         list_for_each_entry(unit_t, curr, &co->units, units) {
615                 res += curr->inevitable_costs;
616                 for (i=1; i<curr->node_count; ++i)
617                         res += curr->costs[i];
618         }
619         return res;
620 }
621
622 int co_get_inevit_copy_costs(const copy_opt_t *co) {
623         int res = 0;
624         unit_t *curr;
625
626         ASSERT_OU_AVAIL(co);
627
628         list_for_each_entry(unit_t, curr, &co->units, units)
629                 res += curr->inevitable_costs;
630         return res;
631 }
632
633 int co_get_copy_costs(const copy_opt_t *co) {
634         int i, res = 0;
635         unit_t *curr;
636
637         ASSERT_OU_AVAIL(co);
638
639         list_for_each_entry(unit_t, curr, &co->units, units) {
640                 int root_col = get_irn_col(co, curr->nodes[0]);
641                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
642                 res += curr->inevitable_costs;
643                 for (i=1; i<curr->node_count; ++i) {
644                         int arg_col = get_irn_col(co, curr->nodes[i]);
645                         if (root_col != arg_col) {
646                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
647                                 res += curr->costs[i];
648                         }
649                 }
650         }
651         return res;
652 }
653
654 int co_get_lower_bound(const copy_opt_t *co) {
655         int res = 0;
656         unit_t *curr;
657
658         ASSERT_OU_AVAIL(co);
659
660         list_for_each_entry(unit_t, curr, &co->units, units)
661                 res += curr->inevitable_costs + curr->min_nodes_costs;
662         return res;
663 }
664
665 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
666 {
667         bitset_t *seen = bitset_irg_malloc(co->irg);
668         affinity_node_t *an;
669
670         memset(stat, 0, sizeof(stat[0]));
671
672         /* count affinity edges. */
673         co_gs_foreach_aff_node(co, an) {
674                 neighb_t *neigh;
675                 stat->aff_nodes += 1;
676                 bitset_add_irn(seen, an->irn);
677                 co_gs_foreach_neighb(an, neigh) {
678                         if(!bitset_contains_irn(seen, neigh->irn)) {
679                                 stat->aff_edges += 1;
680                                 stat->max_costs += neigh->costs;
681
682                                 if(get_irn_col(co, an->irn) != get_irn_col(co, neigh->irn)) {
683                                         stat->costs += neigh->costs;
684                                         stat->unsatisfied_edges += 1;
685                                 }
686
687                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
688                                         stat->aff_int += 1;
689                                         stat->inevit_costs += neigh->costs;
690                                 }
691
692                         }
693                 }
694         }
695
696         bitset_free(seen);
697 }
698
699 /******************************************************************************
700    _____                 _        _____ _
701   / ____|               | |      / ____| |
702  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
703  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
704  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
705   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
706                   | |                                       __/ |
707                   |_|                                      |___/
708  ******************************************************************************/
709
710 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
711         const affinity_node_t *n1 = k1;
712         const affinity_node_t *n2 = k2;
713         (void) size;
714
715         return (n1->irn != n2->irn);
716 }
717
718 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
719         affinity_node_t new_node, *node;
720         neighb_t        *nbr;
721         int             allocnew = 1;
722
723         new_node.irn        = n1;
724         new_node.degree     = 0;
725         new_node.neighbours = NULL;
726         node = set_insert(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
727
728         for (nbr = node->neighbours; nbr; nbr = nbr->next)
729                 if (nbr->irn == n2) {
730                         allocnew = 0;
731                         break;
732                 }
733
734         /* if we did not find n2 in n1's neighbourhood insert it */
735         if (allocnew) {
736                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
737                 nbr->irn   = n2;
738                 nbr->costs = 0;
739                 nbr->next  = node->neighbours;
740
741                 node->neighbours = nbr;
742                 node->degree++;
743         }
744
745         /* now nbr points to n1's neighbour-entry of n2 */
746         nbr->costs += costs;
747 }
748
749 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
750         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
751                 add_edge(co, n1, n2, costs);
752                 add_edge(co, n2, n1, costs);
753         }
754 }
755
756 static void build_graph_walker(ir_node *irn, void *env) {
757         copy_opt_t *co = env;
758         int pos, max;
759         const arch_register_t *reg;
760
761         if (!is_curr_reg_class(co, irn) || arch_irn_is(co->aenv, irn, ignore))
762                 return;
763
764         reg = arch_get_irn_register(co->aenv, irn);
765         if (arch_register_type_is(reg, ignore))
766                 return;
767
768         if (is_Reg_Phi(irn)) { /* Phis */
769                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
770                         ir_node *arg = get_irn_n(irn, pos);
771                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
772                 }
773         }
774         else if (is_Perm_Proj(co->aenv, irn)) { /* Perms */
775                 ir_node *arg = get_Perm_src(irn);
776                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
777         }
778         else { /* 2-address code */
779                 const arch_register_req_t *req = arch_get_register_req(co->aenv, irn, -1);
780                 if (is_2addr_code(req)) {
781                         const int *i;
782                         for (i = req->other_same; i != ENDOF(req->other_same); ++i) {
783                                 ir_node *other;
784
785                                 if (*i == -1) break;
786
787                                 other = get_irn_n(skip_Proj(irn), *i);
788                                 if (! arch_irn_is(co->aenv, other, ignore))
789                                         add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
790                         }
791                 }
792         }
793 }
794
795 void co_build_graph_structure(copy_opt_t *co) {
796         obstack_init(&co->obst);
797         co->nodes = new_set(compare_affinity_node_t, 32);
798
799         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
800 }
801
802 void co_free_graph_structure(copy_opt_t *co) {
803         ASSERT_GS_AVAIL(co);
804
805         del_set(co->nodes);
806         obstack_free(&co->obst, NULL);
807         co->nodes = NULL;
808 }
809
810 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
811
812 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
813         affinity_node_t new_node, *n;
814
815         ASSERT_GS_AVAIL(co);
816
817         new_node.irn = irn;
818         n = set_find(co->nodes, &new_node, sizeof(new_node), nodeset_hash(new_node.irn));
819         if (n) {
820                 return (n->degree > 0);
821         } else
822                 return 0;
823 }
824
825 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
826 {
827         be_ifg_t *ifg  = co->cenv->ifg;
828         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
829
830         ir_node *irn;
831         void *it, *nit;
832         int i, n, n_regs;
833
834         n_regs = 0;
835         for(i = 0; i < co->cls->n_regs; ++i) {
836                 const arch_register_t *reg = &co->cls->regs[i];
837                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
838         }
839
840         /*
841          * n contains the first node number.
842          * the values below n are the pre-colored register nodes
843          */
844
845         it  = be_ifg_nodes_iter_alloca(ifg);
846         nit = be_ifg_neighbours_iter_alloca(ifg);
847
848         n = n_regs;
849         be_ifg_foreach_node(ifg, it, irn) {
850                 if(!arch_irn_is(co->aenv, irn, ignore))
851                         set_irn_link(irn, INT_TO_PTR(n++));
852         }
853
854         fprintf(f, "%d %d\n", n, n_regs);
855
856         be_ifg_foreach_node(ifg, it, irn) {
857                 if(!arch_irn_is(co->aenv, irn, ignore)) {
858                         int idx            = PTR_TO_INT(get_irn_link(irn));
859                         affinity_node_t *a = get_affinity_info(co, irn);
860
861                         const arch_register_req_t *req;
862                         ir_node *adj;
863
864                         req = arch_get_register_req(co->aenv, irn, BE_OUT_POS(0));
865                         if(arch_register_req_is(req, limited)) {
866                                 for(i = 0; i < co->cls->n_regs; ++i) {
867                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
868                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
869                                 }
870                         }
871
872
873                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
874                                 if(!arch_irn_is(co->aenv, adj, ignore)) {
875                                         int adj_idx = PTR_TO_INT(get_irn_link(adj));
876                                         if(idx < adj_idx)
877                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
878                                 }
879                         }
880
881                         if(a) {
882                                 neighb_t *n;
883
884                                 co_gs_foreach_neighb(a, n) {
885                                         if(!arch_irn_is(co->aenv, n->irn, ignore)) {
886                                                 int n_idx = PTR_TO_INT(get_irn_link(n->irn));
887                                                 if(idx < n_idx)
888                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
889                                         }
890                                 }
891                         }
892                 }
893         }
894 }
895
896 typedef struct _appel_clique_walker_t {
897         ir_phase ph;
898         const copy_opt_t *co;
899         int curr_nr;
900         int node_count;
901         FILE *f;
902         int dumb;
903         int *color_map;
904         struct obstack obst;
905 } appel_clique_walker_t;
906
907 typedef struct _appel_block_info_t {
908         int *live_end_nr;
909         int *live_in_nr;
910         int *phi_nr;
911         ir_node **live_end;
912         ir_node **live_in;
913         ir_node **phi;
914         int n_live_end;
915         int n_live_in;
916         int n_phi;
917 } appel_block_info_t;
918
919 static int appel_aff_weight(const appel_clique_walker_t *env, ir_node *bl)
920 {
921 #if 0
922         double freq = get_block_execfreq(env->co->cenv->execfreq, bl);
923         int res = (int) freq;
924         return res == 0 ? 1 : res;
925 #else
926         ir_loop *loop = get_irn_loop(bl);
927         (void) env;
928         if(loop) {
929                 int d = get_loop_depth(loop);
930                 return 1 + d * d;
931         }
932         return 1;
933 #endif
934 }
935
936 static void *appel_clique_walker_irn_init(ir_phase *phase, ir_node *irn, void *old)
937 {
938         appel_block_info_t *res = NULL;
939         (void) old;
940
941         if(is_Block(irn)) {
942                 appel_clique_walker_t *d = (void *) phase;
943                 res = phase_alloc(phase, sizeof(res[0]));
944                 res->phi_nr      = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
945                 res->live_end_nr = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end_nr));
946                 res->live_in_nr  = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in_nr));
947                 res->live_end    = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_end));
948                 res->live_in     = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
949                 res->phi         = phase_alloc(phase, d->co->cls->n_regs * sizeof(res->live_in));
950         }
951
952         return res;
953 }
954
955 typedef struct _insn_list_t {
956         be_insn_t *insn;
957         struct list_head list;
958 } insn_list_t;
959
960 static int appel_get_live_end_nr(appel_clique_walker_t *env, ir_node *bl, ir_node *irn)
961 {
962         appel_block_info_t *bli = phase_get_irn_data(&env->ph, bl);
963         int i;
964
965         for(i = 0; i < bli->n_live_end; ++i)
966                 if(bli->live_end[i] == irn)
967                         return bli->live_end_nr[i];
968
969         return -1;
970 }
971
972 static int appel_dump_clique(appel_clique_walker_t *env, const ir_nodeset_t *live, ir_node *bl, int curr_nr, int start_nr)
973 {
974         ir_node **live_arr = alloca(env->co->cls->n_regs * sizeof(live_arr[0]));
975         ir_node *irn;
976         int n_live;
977         int j;
978         ir_nodeset_iterator_t iter;
979
980         n_live = 0;
981         foreach_ir_nodeset(live, irn, iter)
982                 live_arr[n_live++] = irn;
983
984         /* dump the live after clique */
985         if(!env->dumb) {
986                 for(j = 0; j < n_live; ++j) {
987                         int k;
988
989                         for(k = j + 1; k < n_live; ++k) {
990                                 fprintf(env->f, "%d %d -1 ", curr_nr + j, curr_nr + k);
991                         }
992                         fprintf(env->f, "\n");
993                 }
994         }
995
996         /* dump the affinities */
997         for(j = 0; !env->dumb && j < n_live; ++j) {
998                 ir_node *irn = live_arr[j];
999                 int old_nr = PTR_TO_INT(get_irn_link(irn));
1000
1001                 /* if the node was already live in the last insn dump the affinity */
1002                 if(old_nr > start_nr) {
1003                         int weight = appel_aff_weight(env, bl);
1004                         fprintf(env->f, "%d %d %d\n", old_nr, curr_nr + j, weight);
1005                 }
1006         }
1007
1008         /* set the current numbers into the link field. */
1009         for(j = 0; j < n_live; ++j) {
1010                 ir_node *irn = live_arr[j];
1011                 set_irn_link(irn, INT_TO_PTR(curr_nr + j));
1012         }
1013
1014         return curr_nr + n_live;
1015 }
1016
1017 static void appel_walker(ir_node *bl, void *data)
1018 {
1019         appel_clique_walker_t *env = data;
1020         appel_block_info_t *bli    = phase_get_or_set_irn_data(&env->ph, bl);
1021         struct obstack *obst       = &env->obst;
1022         void *base                 = obstack_base(obst);
1023         ir_nodeset_t live;
1024         ir_nodeset_iterator_t iter;
1025         be_lv_t *lv                = env->co->cenv->birg->lv;
1026
1027         int n_insns  = 0;
1028         int n_nodes  = 0;
1029         int start_nr = env->curr_nr;
1030         int curr_nr  = start_nr;
1031
1032         be_insn_env_t insn_env;
1033         int i, j;
1034         ir_node *irn;
1035         be_insn_t **insns;
1036
1037         insn_env.aenv = env->co->aenv;
1038         insn_env.cls  = env->co->cls;
1039         insn_env.obst = obst;
1040         insn_env.ignore_colors = env->co->cenv->ignore_colors;
1041
1042         /* Guess how many insns will be in this block. */
1043         sched_foreach(bl, irn)
1044                 n_nodes++;
1045
1046         bli->n_phi = 0;
1047         insns = xmalloc(n_nodes * sizeof(insns[0]));
1048
1049         /* Put all insns in an array. */
1050         irn = sched_first(bl);
1051         while(!sched_is_end(irn)) {
1052                 be_insn_t *insn;
1053                 insn = be_scan_insn(&insn_env, irn);
1054                 insns[n_insns++] = insn;
1055                 irn = insn->next_insn;
1056         }
1057
1058         DBG((dbg, LEVEL_2, "%+F\n", bl));
1059         ir_nodeset_init(&live);
1060         be_liveness_end_of_block(lv, env->co->aenv, env->co->cls, bl, &live);
1061
1062         /* Generate the bad and ugly. */
1063         for(i = n_insns - 1; i >= 0; --i) {
1064                 be_insn_t *insn = insns[i];
1065
1066                 /* The first live set has to be saved in the block border set. */
1067                 if(i == n_insns - 1) {
1068                         j = 0;
1069                         foreach_ir_nodeset(&live, irn, iter) {
1070                                 bli->live_end[j]    = irn;
1071                                 bli->live_end_nr[j] = curr_nr + j;
1072                                 ++j;
1073                         }
1074                         bli->n_live_end = j;
1075                 }
1076
1077                 if(!env->dumb) {
1078                         for(j = 0; j < insn->use_start; ++j) {
1079                                 ir_node *op   = insn->ops[j].carrier;
1080                                 bitset_t *adm = insn->ops[j].regs;
1081                                 int k;
1082                                 size_t nr;
1083
1084                                 if(!insn->ops[j].has_constraints)
1085                                         continue;
1086
1087                                 nr = 0;
1088                                 foreach_ir_nodeset(&live, irn, iter) {
1089                                         if(irn == op) {
1090                                                 break;
1091                                         }
1092                                         ++nr;
1093                                 }
1094
1095                                 assert(nr < ir_nodeset_size(&live));
1096
1097                                 for(k = 0; k < env->co->cls->n_regs; ++k) {
1098                                         int mapped_col = env->color_map[k];
1099                                         if(mapped_col >= 0 && !bitset_is_set(adm, k) && !bitset_is_set(env->co->cenv->ignore_colors, k))
1100                                                 fprintf(env->f, "%d %d -1\n", curr_nr + nr, mapped_col);
1101                                 }
1102                         }
1103                 }
1104
1105                 /* dump the clique and update the stuff. */
1106                 curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1107
1108                 /* remove all defs. */
1109                 for(j = 0; j < insn->use_start; ++j)
1110                         ir_nodeset_remove(&live, insn->ops[j].carrier);
1111
1112                 if(is_Phi(insn->irn) && arch_irn_consider_in_reg_alloc(env->co->aenv, env->co->cls, insn->irn)) {
1113                         bli->phi[bli->n_phi]    = insn->irn;
1114                         bli->phi_nr[bli->n_phi] = PTR_TO_INT(get_irn_link(insn->irn));
1115                         bli->n_phi++;
1116                 }
1117
1118                 /* add all uses */
1119                 else
1120                         for(j = insn->use_start; j < insn->n_ops; ++j)
1121                                 ir_nodeset_insert(&live, insn->ops[j].carrier);
1122         }
1123
1124         /* print the start clique. */
1125         curr_nr = appel_dump_clique(env, &live, bl, curr_nr, start_nr);
1126
1127         i = 0;
1128         foreach_ir_nodeset(&live, irn, iter) {
1129                 bli->live_in[i]    = irn;
1130                 bli->live_in_nr[i] = PTR_TO_INT(get_irn_link(irn));
1131                 ++i;
1132         }
1133         bli->n_live_in = i;
1134
1135         ir_nodeset_destroy(&live);
1136         free(insns);
1137         obstack_free(obst, base);
1138         env->curr_nr = curr_nr;
1139 }
1140
1141 static void appel_inter_block_aff(ir_node *bl, void *data)
1142 {
1143         appel_clique_walker_t *env = data;
1144         appel_block_info_t *bli    = phase_get_irn_data(&env->ph, bl);
1145
1146         int i, j, n;
1147
1148         for(i = 0; i < bli->n_live_in; ++i) {
1149                 ir_node *irn = bli->live_in[i];
1150
1151                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1152                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1153
1154                         int nr = appel_get_live_end_nr(env, pred, irn);
1155                         assert(nr >= 0);
1156                         fprintf(env->f, "%d %d 1\n", bli->live_in_nr[i], nr);
1157                 }
1158         }
1159
1160         for(i = 0; i < bli->n_phi; ++i) {
1161                 ir_node *irn = bli->phi[i];
1162
1163                 for(j = 0, n = get_Block_n_cfgpreds(bl); j < n; ++j) {
1164                         ir_node *pred  = get_Block_cfgpred_block(bl, j);
1165                         ir_node *op = get_irn_n(irn, j);
1166
1167                         int nr = appel_get_live_end_nr(env, pred, op);
1168                         assert(nr >= 0);
1169                         fprintf(env->f, "%d %d 1\n", bli->phi_nr[i], nr);
1170                 }
1171         }
1172
1173 }
1174
1175 void co_dump_appel_graph_cliques(const copy_opt_t *co, FILE *f)
1176 {
1177         int i;
1178         int n_colors;
1179         appel_clique_walker_t env;
1180         bitset_t *adm = bitset_alloca(co->cls->n_regs);
1181         be_lv_t *lv = co->cenv->birg->lv;
1182
1183         be_liveness_recompute(lv);
1184         obstack_init(&env.obst);
1185         phase_init(&env.ph, "appel_clique_dumper", co->irg, PHASE_DEFAULT_GROWTH, appel_clique_walker_irn_init, NULL);
1186         env.curr_nr = co->cls->n_regs;
1187         env.co = co;
1188         env.f = f;
1189
1190         bitset_copy(adm, co->cenv->ignore_colors);
1191         bitset_flip_all(adm);
1192
1193         /* Make color map. */
1194         env.color_map = alloca(co->cls->n_regs * sizeof(env.color_map[0]));
1195         for(i = 0, n_colors = 0; i < co->cls->n_regs; ++i) {
1196                 const arch_register_t *reg = &co->cls->regs[i];
1197                 env.color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_colors++;
1198         }
1199
1200         env.dumb = 1;
1201         env.curr_nr = n_colors;
1202         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1203         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1204
1205         fprintf(f, "%d %d\n", env.curr_nr, n_colors);
1206
1207         /* make the first k nodes interfere */
1208         for(i = 0; i < n_colors; ++i) {
1209                 int j;
1210                 for(j = i + 1; j < n_colors; ++j)
1211                         fprintf(f, "%d %d -1 ", i, j);
1212                 fprintf(f, "\n");
1213         }
1214
1215         env.dumb = 0;
1216         env.curr_nr = n_colors;
1217         irg_block_walk_graph(co->irg, firm_clear_link, NULL, NULL);
1218         irg_block_walk_graph(co->irg, appel_walker, NULL, &env);
1219         irg_block_walk_graph(co->irg, appel_inter_block_aff, NULL, &env);
1220         obstack_free(&env.obst, NULL);
1221 }
1222
1223 /*
1224          ___ _____ ____   ____   ___ _____   ____                        _
1225         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
1226          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
1227          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
1228         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
1229                                                                   |_|            |___/
1230 */
1231
1232 static const char *get_dot_color_name(size_t col)
1233 {
1234         static const char *names[] = {
1235                 "blue",
1236                 "red",
1237                 "green",
1238                 "yellow",
1239                 "cyan",
1240                 "magenta",
1241                 "orange",
1242                 "chocolate",
1243                 "beige",
1244                 "navy",
1245                 "darkgreen",
1246                 "darkred",
1247                 "lightPink",
1248                 "chartreuse",
1249                 "lightskyblue",
1250                 "linen",
1251                 "pink",
1252                 "lightslateblue",
1253                 "mintcream",
1254                 "red",
1255                 "darkolivegreen",
1256                 "mediumblue",
1257                 "mistyrose",
1258                 "salmon",
1259                 "darkseagreen",
1260                 "mediumslateblue"
1261                 "moccasin",
1262                 "tomato",
1263                 "forestgreen",
1264                 "darkturquoise",
1265                 "palevioletred"
1266         };
1267
1268         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1269 }
1270
1271 typedef struct _co_ifg_dump_t {
1272         const copy_opt_t *co;
1273         unsigned flags;
1274 } co_ifg_dump_t;
1275
1276 static void ifg_dump_graph_attr(FILE *f, void *self)
1277 {
1278         (void) self;
1279         fprintf(f, "overlap=scale");
1280 }
1281
1282 static int ifg_is_dump_node(void *self, ir_node *irn)
1283 {
1284         co_ifg_dump_t *cod = self;
1285         return !arch_irn_is(cod->co->aenv, irn, ignore);
1286 }
1287
1288 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1289 {
1290         co_ifg_dump_t *env         = self;
1291         const arch_register_t *reg = arch_get_irn_register(env->co->aenv, irn);
1292         const arch_register_req_t *req;
1293         int limited;
1294
1295         req = arch_get_register_req(env->co->aenv, irn, BE_OUT_POS(0));
1296         limited = arch_register_req_is(req, limited);
1297
1298         if(env->flags & CO_IFG_DUMP_LABELS) {
1299                 ir_fprintf(f, "label=\"%+F", irn);
1300
1301 #if 0
1302                 // TODO fix this...
1303                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1304                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1305                         req.limited(req.limited_env, bs);
1306                         ir_fprintf(f, "\\n%B", bs);
1307                 }
1308 #endif
1309                 ir_fprintf(f, "\" ");
1310         } else {
1311                 fprintf(f, "label=\"\" shape=point " );
1312         }
1313
1314         if(env->flags & CO_IFG_DUMP_SHAPE)
1315                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1316
1317         if(env->flags & CO_IFG_DUMP_COLORS)
1318                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1319 }
1320
1321 static void ifg_dump_at_end(FILE *file, void *self)
1322 {
1323         co_ifg_dump_t *env = self;
1324         affinity_node_t *a;
1325
1326         co_gs_foreach_aff_node(env->co, a) {
1327                 const arch_register_t *ar = arch_get_irn_register(env->co->aenv, a->irn);
1328                 unsigned aidx = get_irn_idx(a->irn);
1329                 neighb_t *n;
1330
1331                 co_gs_foreach_neighb(a, n) {
1332                         const arch_register_t *nr = arch_get_irn_register(env->co->aenv, n->irn);
1333                         unsigned nidx = get_irn_idx(n->irn);
1334
1335                         if(aidx < nidx) {
1336                                 const char *color = nr == ar ? "blue" : "red";
1337                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1338                                 if(env->flags & CO_IFG_DUMP_LABELS)
1339                                         fprintf(file, "label=\"%d\" ", n->costs);
1340                                 if(env->flags & CO_IFG_DUMP_COLORS)
1341                                         fprintf(file, "color=%s ", color);
1342                                 else
1343                                         fprintf(file, "style=dotted");
1344                                 fprintf(file, "];\n");
1345                         }
1346                 }
1347         }
1348 }
1349
1350
1351 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1352         ifg_is_dump_node,
1353         ifg_dump_graph_attr,
1354         ifg_dump_node_attr,
1355         NULL,
1356         NULL,
1357         ifg_dump_at_end
1358 };
1359
1360
1361
1362 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1363 {
1364         co_ifg_dump_t cod;
1365
1366         cod.co    = co;
1367         cod.flags = flags;
1368         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1369 }
1370
1371
1372 void co_solve_park_moon(copy_opt_t *opt)
1373 {
1374         (void) opt;
1375 }
1376
1377 static int void_algo(copy_opt_t *co)
1378 {
1379         (void) co;
1380         return 0;
1381 }
1382
1383 /*
1384                 _    _                  _ _   _
1385            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1386           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1387          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1388         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1389                            |___/
1390 */
1391
1392 typedef struct {
1393         co_algo_t  *algo;
1394         const char *name;
1395         int        can_improve_existing;
1396 } co_algo_info_t;
1397
1398 static co_algo_info_t algos[] = {
1399         { void_algo,               "none",  0 },
1400         { co_solve_heuristic,      "heur1", 0 },
1401         { co_solve_heuristic_new,  "heur2", 0 },
1402         { co_solve_heuristic_java, "heur3", 0 },
1403         { co_solve_heuristic_mst,  "heur4", 0 },
1404 #ifdef WITH_ILP
1405         { co_solve_ilp2,           "ilp",   1 },
1406 #endif
1407         { NULL,                    "",      0 }
1408 };
1409
1410 /*
1411     __  __       _         ____       _
1412    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1413    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1414    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1415    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1416
1417 */
1418
1419 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1420 {
1421         FILE *result;
1422         char buf[1024];
1423
1424         ir_snprintf(buf, sizeof(buf), "%s%F_%s%s", prefix, env->irg, env->cls->name, suffix);
1425         result = fopen(buf, "wt");
1426         if(result == NULL) {
1427                 panic("Couldn't open '%s' for writing.", buf);
1428         }
1429
1430         return result;
1431 }
1432
1433 void co_driver(be_chordal_env_t *cenv)
1434 {
1435         lc_timer_t          *timer = lc_timer_register("firm.be.copyopt", "runtime");
1436         co_complete_stats_t before, after;
1437         copy_opt_t          *co;
1438         co_algo_t           *algo_func;
1439         int                 was_optimal = 0;
1440
1441         if (algo >= CO_ALGO_LAST)
1442                 return;
1443
1444         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1445
1446         co = new_copy_opt(cenv, cost_func);
1447         co_build_ou_structure(co);
1448         co_build_graph_structure(co);
1449
1450         co_complete_stats(co, &before);
1451
1452         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1453         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1454         be_stat_ev_ull("co_max_costs",    before.max_costs);
1455         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1456         be_stat_ev_ull("co_aff_int",      before.aff_int);
1457
1458         be_stat_ev_ull("co_init_costs",   before.costs);
1459         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1460
1461         /* Dump the interference graph in Appel's format. */
1462         if (dump_flags & DUMP_APPEL) {
1463                 FILE *f = my_open(cenv, "", ".apl");
1464                 co_dump_appel_graph(co, f);
1465                 fclose(f);
1466         }
1467
1468         if (dump_flags & DUMP_BEFORE) {
1469                 FILE *f = my_open(cenv, "", "-before.dot");
1470                 co_dump_ifg_dot(co, f, style_flags);
1471                 fclose(f);
1472         }
1473
1474         /* if the algo can improve results, provide an initial solution with heur3 */
1475         if (improve && algos[algo].can_improve_existing) {
1476                 co_complete_stats_t stats;
1477
1478                 /* produce a heuristic solution */
1479 #ifdef WITH_JVM
1480                 co_solve_heuristic_java(co);
1481 #else
1482                 co_solve_heuristic(co);
1483 #endif /* WITH_JVM */
1484
1485                 /* do the stats and provide the current costs */
1486                 co_complete_stats(co, &stats);
1487                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1488         }
1489
1490 #ifdef WITH_JVM
1491         /* start the JVM here so that it does not tamper the timing. */
1492         if (algo == CO_ALGO_HEUR3)
1493                 be_java_coal_start_jvm();
1494 #endif /* WITH_JVM */
1495
1496         algo_func = algos[algo].algo;
1497
1498         /* perform actual copy minimization */
1499         lc_timer_reset_and_start(timer);
1500         was_optimal = algo_func(co);
1501         lc_timer_stop(timer);
1502
1503         be_stat_ev("co_time", lc_timer_elapsed_msec(timer));
1504         be_stat_ev_ull("co_optimal", was_optimal);
1505
1506         if (dump_flags & DUMP_AFTER) {
1507                 FILE *f = my_open(cenv, "", "-after.dot");
1508                 co_dump_ifg_dot(co, f, style_flags);
1509                 fclose(f);
1510         }
1511
1512         co_complete_stats(co, &after);
1513
1514         if (do_stats) {
1515                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1516                 ulong64 evitable          = after.costs     - after.inevit_costs;
1517
1518                 ir_printf("%30F ", cenv->irg);
1519                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1520
1521                 if(optimizable_costs > 0)
1522                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1523                 else
1524                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1525         }
1526
1527         be_stat_ev_ull("co_after_costs", after.costs);
1528         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1529
1530         co_free_graph_structure(co);
1531         co_free_ou_structure(co);
1532         free_copy_opt(co);
1533 }