b9b1aa24b8f265ef43269ad9e9e26f3a1eb3acd6
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "irbitset.h"
46 #include "irphase_t.h"
47 #include "irprintf_t.h"
48
49 #include "bemodule.h"
50 #include "bearch.h"
51 #include "benode_t.h"
52 #include "beutil.h"
53 #include "beifg_t.h"
54 #include "beintlive_t.h"
55 #include "becopyopt_t.h"
56 #include "becopystat.h"
57 #include "belive_t.h"
58 #include "beinsn_t.h"
59 #include "besched.h"
60 #include "bestatevent.h"
61 #include "beirg.h"
62 #include "error.h"
63
64 #include "lc_opts.h"
65 #include "lc_opts_enum.h"
66
67 #define DUMP_BEFORE 1
68 #define DUMP_AFTER  2
69 #define DUMP_APPEL  4
70 #define DUMP_ALL    2 * DUMP_APPEL - 1
71
72 #define COST_FUNC_FREQ     1
73 #define COST_FUNC_LOOP     2
74 #define COST_FUNC_ALL_ONE  3
75
76 static unsigned   dump_flags  = 0;
77 static unsigned   style_flags = 0;
78 static unsigned   do_stats    = 0;
79 static cost_fct_t cost_func   = co_get_costs_exec_freq;
80 static unsigned   algo        = CO_ALGO_HEUR4;
81 static int        improve     = 1;
82
83 static const lc_opt_enum_mask_items_t dump_items[] = {
84         { "before",  DUMP_BEFORE },
85         { "after",   DUMP_AFTER  },
86         { "appel",   DUMP_APPEL  },
87         { "all",     DUMP_ALL    },
88         { NULL,      0 }
89 };
90
91 static const lc_opt_enum_mask_items_t style_items[] = {
92         { "color",   CO_IFG_DUMP_COLORS },
93         { "labels",  CO_IFG_DUMP_LABELS },
94         { "constr",  CO_IFG_DUMP_CONSTR },
95         { "shape",   CO_IFG_DUMP_SHAPE  },
96         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
97         { NULL,      0 }
98 };
99
100 static const lc_opt_enum_mask_items_t algo_items[] = {
101         { "none",   CO_ALGO_NONE  },
102         { "heur",   CO_ALGO_HEUR  },
103         { "heur2",  CO_ALGO_HEUR2 },
104         { "heur4",  CO_ALGO_HEUR4 },
105         { "ilp",    CO_ALGO_ILP   },
106 #ifdef FIRM_KAPS
107         { "pbqp",   CO_ALGO_PBQP  },
108 #endif
109         { NULL,     0 }
110 };
111
112 typedef int (*opt_funcptr)(void);
113
114 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
115         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
116         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
117         { "one",    (opt_funcptr) co_get_costs_all_one },
118         { NULL,     NULL }
119 };
120
121 static lc_opt_enum_mask_var_t dump_var = {
122         &dump_flags, dump_items
123 };
124
125 static lc_opt_enum_mask_var_t style_var = {
126         &style_flags, style_items
127 };
128
129 static lc_opt_enum_mask_var_t algo_var = {
130         &algo, algo_items
131 };
132
133 static lc_opt_enum_func_ptr_var_t cost_func_var = {
134         (opt_funcptr*) &cost_func, cost_func_items
135 };
136
137 static const lc_opt_table_entry_t options[] = {
138         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
139         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
140         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
141         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
142         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
143         LC_OPT_ENT_BOOL          ("improve", "run heur1 before if algo can exploit start solutions",    &improve),
144         LC_OPT_LAST
145 };
146
147 void be_init_copyopt(void)
148 {
149         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
150         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
151         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
152         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
153
154         lc_opt_add_table(co_grp, options);
155 }
156
157 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
158
159 #undef QUICK_AND_DIRTY_HACK
160
161 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
162 {
163         if (env->ifg)
164                 return be_ifg_connected(env->ifg, a, b);
165         else
166                 return be_values_interfere(env->birg->lv, a, b);
167 }
168
169
170 /******************************************************************************
171     _____                           _
172    / ____|                         | |
173   | |  __  ___ _ __   ___ _ __ __ _| |
174   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
175   | |__| |  __/ | | |  __/ | | (_| | |
176    \_____|\___|_| |_|\___|_|  \__,_|_|
177
178  ******************************************************************************/
179
180 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
181
182
183 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
184 {
185         const char *s1, *s2, *s3;
186         int len;
187         copy_opt_t *co;
188
189         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
190
191         co = XMALLOCZ(copy_opt_t);
192         co->cenv      = chordal_env;
193         co->irg       = chordal_env->irg;
194         co->cls       = chordal_env->cls;
195         co->get_costs = get_costs;
196
197         s1 = get_irp_name();
198         s2 = get_entity_name(get_irg_entity(co->irg));
199         s3 = chordal_env->cls->name;
200         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
201         co->name = XMALLOCN(char, len);
202         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
203
204         return co;
205 }
206
207 void free_copy_opt(copy_opt_t *co) {
208         xfree(co->name);
209         free(co);
210 }
211
212 /**
213  * Checks if a node is optimizable, viz. has something to do with coalescing
214  * @param irn  The irn to check
215  */
216 static int co_is_optimizable_root(ir_node *irn)
217 {
218         const arch_register_req_t *req;
219         const arch_register_t     *reg;
220
221         if (arch_irn_is_ignore(irn))
222                 return 0;
223
224         reg = arch_get_irn_register(irn);
225         if (arch_register_type_is(reg, ignore))
226                 return 0;
227
228         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
229                 return 1;
230
231         req = arch_get_register_req_out(irn);
232         if (is_2addr_code(req))
233                 return 1;
234
235         return 0;
236 }
237
238 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
239         int cost = 0;
240         ir_loop *loop;
241         ir_node *root_block = get_nodes_block(root);
242         (void) co;
243         (void) arg;
244
245         if (is_Phi(root)) {
246                 /* for phis the copies are placed in the corresponding pred-block */
247                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
248         } else {
249                 /* a perm places the copy in the same block as it resides */
250                 loop = get_irn_loop(root_block);
251         }
252         if (loop) {
253                 int d = get_loop_depth(loop);
254                 cost = d*d;
255         }
256         return 1+cost;
257 }
258
259 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
260         int res;
261         ir_node *root_bl = get_nodes_block(root);
262         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
263         (void) arg;
264         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
265
266         /* don't allow values smaller than one. */
267         return res < 1 ? 1 : res;
268 }
269
270
271 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
272         (void) co;
273         (void) root;
274         (void) arg;
275         (void) pos;
276         return 1;
277 }
278
279 /******************************************************************************
280    ____        _   _    _       _ _          _____ _
281   / __ \      | | | |  | |     (_) |        / ____| |
282  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
283  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
284  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
285   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
286         | |                                                            __/ |
287         |_|                                                           |___/
288  ******************************************************************************/
289
290 /**
291  * Determines a maximum weighted independent set with respect to
292  * the interference and conflict edges of all nodes in a qnode.
293  */
294 static int ou_max_ind_set_costs(unit_t *ou) {
295         be_chordal_env_t *chordal_env = ou->co->cenv;
296         ir_node **safe, **unsafe;
297         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
298         bitset_t *curr;
299         bitset_pos_t pos;
300         int max, curr_weight, best_weight = 0;
301
302         /* assign the nodes into two groups.
303          * safe: node has no interference, hence it is in every max stable set.
304          * unsafe: node has an interference
305          */
306         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
307         safe_costs   = 0;
308         safe_count   = 0;
309         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
310         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
311         unsafe_count = 0;
312         for(i=1; i<ou->node_count; ++i) {
313                 int is_safe = 1;
314                 for(o=1; o<ou->node_count; ++o) {
315                         if (i==o)
316                                 continue;
317                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
318                                 unsafe_costs[unsafe_count] = ou->costs[i];
319                                 unsafe[unsafe_count] = ou->nodes[i];
320                                 ++unsafe_count;
321                                 is_safe = 0;
322                                 break;
323                         }
324                 }
325                 if (is_safe) {
326                         safe_costs += ou->costs[i];
327                         safe[safe_count++] = ou->nodes[i];
328                 }
329         }
330
331
332         /* now compute the best set out of the unsafe nodes*/
333         if (unsafe_count > MIS_HEUR_TRIGGER) {
334                 bitset_t *best = bitset_alloca(unsafe_count);
335                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
336                 for (i=0; i<unsafe_count; ++i) {
337                         bitset_set(best, i);
338                         /* check if it is a stable set */
339                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
340                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
341                                         bitset_clear(best, i); /* clear the bit and try next one */
342                                         break;
343                                 }
344                 }
345                 /* compute the weight */
346                 bitset_foreach(best, pos)
347                         best_weight += unsafe_costs[pos];
348         } else {
349                 /* Exact Algorithm: Brute force */
350                 curr = bitset_alloca(unsafe_count);
351                 bitset_set_all(curr);
352                 while ((max = bitset_popcnt(curr)) != 0) {
353                         /* check if curr is a stable set */
354                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
355                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
356                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
357                                                         goto no_stable_set;
358
359                         /* if we arrive here, we have a stable set */
360                         /* compute the weigth of the stable set*/
361                         curr_weight = 0;
362                         bitset_foreach(curr, pos)
363                                 curr_weight += unsafe_costs[pos];
364
365                         /* any better ? */
366                         if (curr_weight > best_weight) {
367                                 best_weight = curr_weight;
368                         }
369
370         no_stable_set:
371                         bitset_minus1(curr);
372                 }
373         }
374
375         return safe_costs+best_weight;
376 }
377
378 static void co_collect_units(ir_node *irn, void *env)
379 {
380         const arch_register_req_t *req = arch_get_register_req_out(irn);
381         copy_opt_t                *co  = env;
382         unit_t *unit;
383
384         if (req->cls != co->cls)
385                 return;
386         if (!co_is_optimizable_root(irn))
387                 return;
388
389         /* Init a new unit */
390         unit = XMALLOCZ(unit_t);
391         unit->co = co;
392         unit->node_count = 1;
393         INIT_LIST_HEAD(&unit->queue);
394
395         /* Phi with some/all of its arguments */
396         if (is_Reg_Phi(irn)) {
397                 int i, arity;
398
399                 /* init */
400                 arity = get_irn_arity(irn);
401                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
402                 unit->costs = XMALLOCN(int,      arity + 1);
403                 unit->nodes[0] = irn;
404
405                 /* fill */
406                 for (i=0; i<arity; ++i) {
407                         int o, arg_pos;
408                         ir_node *arg = get_irn_n(irn, i);
409
410                         assert(arch_get_irn_reg_class_out(arg) == co->cls && "Argument not in same register class.");
411                         if (arg == irn)
412                                 continue;
413                         if (nodes_interfere(co->cenv, irn, arg)) {
414                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
415                                 continue;
416                         }
417
418                         /* Else insert the argument of the phi to the members of this ou */
419                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
420
421                         if (arch_irn_is_ignore(arg))
422                                 continue;
423
424                         /* Check if arg has occurred at a prior position in the arg/list */
425                         arg_pos = 0;
426                         for (o=1; o<unit->node_count; ++o) {
427                                 if (unit->nodes[o] == arg) {
428                                         arg_pos = o;
429                                         break;
430                                 }
431                         }
432
433                         if (!arg_pos) { /* a new argument */
434                                 /* insert node, set costs */
435                                 unit->nodes[unit->node_count] = arg;
436                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
437                                 unit->node_count++;
438                         } else { /* arg has occurred before in same phi */
439                                 /* increase costs for existing arg */
440                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
441                         }
442                 }
443                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
444                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
445         } else if (is_Perm_Proj(irn)) {
446                 /* Proj of a perm with corresponding arg */
447                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
448                 unit->nodes = XMALLOCN(ir_node*, 2);
449                 unit->costs = XMALLOCN(int,      2);
450                 unit->node_count = 2;
451                 unit->nodes[0] = irn;
452                 unit->nodes[1] = get_Perm_src(irn);
453                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
454         } else {
455                 /* Src == Tgt of a 2-addr-code instruction */
456                 if (is_2addr_code(req)) {
457                         const unsigned other = req->other_same;
458                         int            count = 0;
459                         int            i;
460
461                         for (i = 0; (1U << i) <= other; ++i) {
462                                 if (other & (1U << i)) {
463                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
464                                         if (arch_irn_is_ignore(o))
465                                                 continue;
466                                         if (nodes_interfere(co->cenv, irn, o))
467                                                 continue;
468                                         ++count;
469                                 }
470                         }
471
472                         if (count != 0) {
473                                 int k = 0;
474                                 ++count;
475                                 unit->nodes = XMALLOCN(ir_node*, count);
476                                 unit->costs = XMALLOCN(int,      count);
477                                 unit->node_count = count;
478                                 unit->nodes[k++] = irn;
479
480                                 for (i = 0; 1U << i <= other; ++i) {
481                                         if (other & (1U << i)) {
482                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
483                                                 if (!arch_irn_is_ignore(o) &&
484                                                                 !nodes_interfere(co->cenv, irn, o)) {
485                                                         unit->nodes[k] = o;
486                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
487                                                         ++k;
488                                                 }
489                                         }
490                                 }
491                         }
492                 } else {
493                         assert(0 && "This is not an optimizable node!");
494                 }
495         }
496
497         /* Insert the new unit at a position according to its costs */
498         if (unit->node_count > 1) {
499                 int i;
500                 struct list_head *tmp;
501
502                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
503                 for(i=1; i<unit->node_count; ++i) {
504                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
505                         unit->all_nodes_costs += unit->costs[i];
506                 }
507
508                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
509                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
510                 /* Insert the new ou according to its sort_key */
511                 tmp = &co->units;
512                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
513                         tmp = tmp->next;
514                 list_add(&unit->units, tmp);
515         } else {
516                 free(unit);
517         }
518 }
519
520 #ifdef QUICK_AND_DIRTY_HACK
521
522 static int compare_ous(const void *k1, const void *k2) {
523         const unit_t *u1 = *((const unit_t **) k1);
524         const unit_t *u2 = *((const unit_t **) k2);
525         int i, o, u1_has_constr, u2_has_constr;
526         arch_register_req_t req;
527
528         /* Units with constraints come first */
529         u1_has_constr = 0;
530         for (i=0; i<u1->node_count; ++i) {
531                 arch_get_register_req_out(&req, u1->nodes[i]);
532                 if (arch_register_req_is(&req, limited)) {
533                         u1_has_constr = 1;
534                         break;
535                 }
536         }
537
538         u2_has_constr = 0;
539         for (i=0; i<u2->node_count; ++i) {
540                 arch_get_register_req_out(&req, u2->nodes[i]);
541                 if (arch_register_req_is(&req, limited)) {
542                         u2_has_constr = 1;
543                         break;
544                 }
545         }
546
547         if (u1_has_constr != u2_has_constr)
548                 return u2_has_constr - u1_has_constr;
549
550         /* Now check, whether the two units are connected */
551 #if 0
552         for (i=0; i<u1->node_count; ++i)
553                 for (o=0; o<u2->node_count; ++o)
554                         if (u1->nodes[i] == u2->nodes[o])
555                                 return 0;
556 #endif
557
558         /* After all, the sort key decides. Greater keys come first. */
559         return u2->sort_key - u1->sort_key;
560
561 }
562
563 /**
564  * Sort the ou's according to constraints and their sort_key
565  */
566 static void co_sort_units(copy_opt_t *co) {
567         int i, count = 0, costs;
568         unit_t *ou, **ous;
569
570         /* get the number of ous, remove them form the list and fill the array */
571         list_for_each_entry(unit_t, ou, &co->units, units)
572                 count++;
573         ous = ALLOCAN(unit_t, count);
574
575         costs = co_get_max_copy_costs(co);
576
577         i = 0;
578         list_for_each_entry(unit_t, ou, &co->units, units)
579                 ous[i++] = ou;
580
581         INIT_LIST_HEAD(&co->units);
582
583         assert(count == i && list_empty(&co->units));
584
585         for (i=0; i<count; ++i)
586                 ir_printf("%+F\n", ous[i]->nodes[0]);
587
588         qsort(ous, count, sizeof(*ous), compare_ous);
589
590         ir_printf("\n\n");
591         for (i=0; i<count; ++i)
592                 ir_printf("%+F\n", ous[i]->nodes[0]);
593
594         /* reinsert into list in correct order */
595         for (i=0; i<count; ++i)
596                 list_add_tail(&ous[i]->units, &co->units);
597
598         assert(costs == co_get_max_copy_costs(co));
599 }
600 #endif
601
602 void co_build_ou_structure(copy_opt_t *co) {
603         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
604         INIT_LIST_HEAD(&co->units);
605         irg_walk_graph(co->irg, co_collect_units, NULL, co);
606 #ifdef QUICK_AND_DIRTY_HACK
607         co_sort_units(co);
608 #endif
609 }
610
611 void co_free_ou_structure(copy_opt_t *co) {
612         unit_t *curr, *tmp;
613         ASSERT_OU_AVAIL(co);
614         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
615                 xfree(curr->nodes);
616                 xfree(curr->costs);
617                 xfree(curr);
618         }
619         co->units.next = NULL;
620 }
621
622 /* co_solve_heuristic() is implemented in becopyheur.c */
623
624 int co_get_max_copy_costs(const copy_opt_t *co) {
625         int i, res = 0;
626         unit_t *curr;
627
628         ASSERT_OU_AVAIL(co);
629
630         list_for_each_entry(unit_t, curr, &co->units, units) {
631                 res += curr->inevitable_costs;
632                 for (i=1; i<curr->node_count; ++i)
633                         res += curr->costs[i];
634         }
635         return res;
636 }
637
638 int co_get_inevit_copy_costs(const copy_opt_t *co) {
639         int res = 0;
640         unit_t *curr;
641
642         ASSERT_OU_AVAIL(co);
643
644         list_for_each_entry(unit_t, curr, &co->units, units)
645                 res += curr->inevitable_costs;
646         return res;
647 }
648
649 int co_get_copy_costs(const copy_opt_t *co) {
650         int i, res = 0;
651         unit_t *curr;
652
653         ASSERT_OU_AVAIL(co);
654
655         list_for_each_entry(unit_t, curr, &co->units, units) {
656                 int root_col = get_irn_col(curr->nodes[0]);
657                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
658                 res += curr->inevitable_costs;
659                 for (i=1; i<curr->node_count; ++i) {
660                         int arg_col = get_irn_col(curr->nodes[i]);
661                         if (root_col != arg_col) {
662                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
663                                 res += curr->costs[i];
664                         }
665                 }
666         }
667         return res;
668 }
669
670 int co_get_lower_bound(const copy_opt_t *co) {
671         int res = 0;
672         unit_t *curr;
673
674         ASSERT_OU_AVAIL(co);
675
676         list_for_each_entry(unit_t, curr, &co->units, units)
677                 res += curr->inevitable_costs + curr->min_nodes_costs;
678         return res;
679 }
680
681 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
682 {
683         bitset_t *seen = bitset_irg_malloc(co->irg);
684         affinity_node_t *an;
685
686         memset(stat, 0, sizeof(stat[0]));
687
688         /* count affinity edges. */
689         co_gs_foreach_aff_node(co, an) {
690                 neighb_t *neigh;
691                 stat->aff_nodes += 1;
692                 bitset_add_irn(seen, an->irn);
693                 co_gs_foreach_neighb(an, neigh) {
694                         if(!bitset_contains_irn(seen, neigh->irn)) {
695                                 stat->aff_edges += 1;
696                                 stat->max_costs += neigh->costs;
697
698                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
699                                         stat->costs += neigh->costs;
700                                         stat->unsatisfied_edges += 1;
701                                 }
702
703                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
704                                         stat->aff_int += 1;
705                                         stat->inevit_costs += neigh->costs;
706                                 }
707
708                         }
709                 }
710         }
711
712         bitset_free(seen);
713 }
714
715 /******************************************************************************
716    _____                 _        _____ _
717   / ____|               | |      / ____| |
718  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
719  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
720  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
721   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
722                   | |                                       __/ |
723                   |_|                                      |___/
724  ******************************************************************************/
725
726 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
727         const affinity_node_t *n1 = k1;
728         const affinity_node_t *n2 = k2;
729         (void) size;
730
731         return (n1->irn != n2->irn);
732 }
733
734 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
735         affinity_node_t new_node, *node;
736         neighb_t        *nbr;
737         int             allocnew = 1;
738
739         new_node.irn        = n1;
740         new_node.degree     = 0;
741         new_node.neighbours = NULL;
742         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
743
744         for (nbr = node->neighbours; nbr; nbr = nbr->next)
745                 if (nbr->irn == n2) {
746                         allocnew = 0;
747                         break;
748                 }
749
750         /* if we did not find n2 in n1's neighbourhood insert it */
751         if (allocnew) {
752                 nbr        = OALLOC(&co->obst, neighb_t);
753                 nbr->irn   = n2;
754                 nbr->costs = 0;
755                 nbr->next  = node->neighbours;
756
757                 node->neighbours = nbr;
758                 node->degree++;
759         }
760
761         /* now nbr points to n1's neighbour-entry of n2 */
762         nbr->costs += costs;
763 }
764
765 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
766         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
767                 add_edge(co, n1, n2, costs);
768                 add_edge(co, n2, n1, costs);
769         }
770 }
771
772 static void build_graph_walker(ir_node *irn, void *env) {
773         const arch_register_req_t *req = arch_get_register_req_out(irn);
774         copy_opt_t                *co  = env;
775         int pos, max;
776         const arch_register_t *reg;
777
778         if (req->cls != co->cls || arch_irn_is_ignore(irn))
779                 return;
780
781         reg = arch_get_irn_register(irn);
782         if (arch_register_type_is(reg, ignore))
783                 return;
784
785         if (is_Reg_Phi(irn)) { /* Phis */
786                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
787                         ir_node *arg = get_irn_n(irn, pos);
788                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
789                 }
790         } else if (is_Perm_Proj(irn)) { /* Perms */
791                 ir_node *arg = get_Perm_src(irn);
792                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
793         } else { /* 2-address code */
794                 if (is_2addr_code(req)) {
795                         const unsigned other = req->other_same;
796                         int i;
797
798                         for (i = 0; 1U << i <= other; ++i) {
799                                 if (other & (1U << i)) {
800                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
801                                         if (!arch_irn_is_ignore(other))
802                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
803                                 }
804                         }
805                 }
806         }
807 }
808
809 void co_build_graph_structure(copy_opt_t *co) {
810         obstack_init(&co->obst);
811         co->nodes = new_set(compare_affinity_node_t, 32);
812
813         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
814 }
815
816 void co_free_graph_structure(copy_opt_t *co) {
817         ASSERT_GS_AVAIL(co);
818
819         del_set(co->nodes);
820         obstack_free(&co->obst, NULL);
821         co->nodes = NULL;
822 }
823
824 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
825
826 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
827         affinity_node_t new_node, *n;
828
829         ASSERT_GS_AVAIL(co);
830
831         new_node.irn = irn;
832         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
833         if (n) {
834                 return (n->degree > 0);
835         } else
836                 return 0;
837 }
838
839 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
840 {
841         ir_node *nodes[]  = { a, b };
842         bitset_t *constr[] = { NULL, NULL };
843         int j;
844
845         constr[0] = bitset_alloca(co->cls->n_regs);
846         constr[1] = bitset_alloca(co->cls->n_regs);
847
848         for (j = 0; j < 2; ++j) {
849                 const arch_register_req_t *req = arch_get_register_req_out(nodes[j]);
850                 if(arch_register_req_is(req, limited))
851                         rbitset_copy_to_bitset(req->limited, constr[j]);
852                 else
853                         bitset_set_all(constr[j]);
854
855         }
856
857         return !bitset_intersect(constr[0], constr[1]);
858 }
859
860 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
861 {
862         be_ifg_t *ifg       = co->cenv->ifg;
863         int      *color_map = ALLOCAN(int, co->cls->n_regs);
864         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
865
866         ir_node *irn;
867         void *it, *nit;
868         int n, n_regs;
869         unsigned i;
870
871         n_regs = 0;
872         for(i = 0; i < co->cls->n_regs; ++i) {
873                 const arch_register_t *reg = &co->cls->regs[i];
874                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
875         }
876
877         /*
878          * n contains the first node number.
879          * the values below n are the pre-colored register nodes
880          */
881
882         it  = be_ifg_nodes_iter_alloca(ifg);
883         nit = be_ifg_neighbours_iter_alloca(ifg);
884
885         n = n_regs;
886         be_ifg_foreach_node(ifg, it, irn) {
887                 if (arch_irn_is_ignore(irn))
888                         continue;
889                 node_map[get_irn_idx(irn)] = n++;
890         }
891
892         fprintf(f, "%d %d\n", n, n_regs);
893
894         be_ifg_foreach_node(ifg, it, irn) {
895                 if (!arch_irn_is_ignore(irn)) {
896                         int idx                        = node_map[get_irn_idx(irn)];
897                         affinity_node_t           *a   = get_affinity_info(co, irn);
898                         const arch_register_req_t *req = arch_get_register_req_out(irn);
899                         ir_node                   *adj;
900
901                         if(arch_register_req_is(req, limited)) {
902                                 for(i = 0; i < co->cls->n_regs; ++i) {
903                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
904                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
905                                 }
906                         }
907
908                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
909                                 if (!arch_irn_is_ignore(adj) &&
910                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
911                                         int adj_idx = node_map[get_irn_idx(adj)];
912                                         if(idx < adj_idx)
913                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
914                                 }
915                         }
916
917                         if(a) {
918                                 neighb_t *n;
919
920                                 co_gs_foreach_neighb(a, n) {
921                                         if (!arch_irn_is_ignore(n->irn)) {
922                                                 int n_idx = node_map[get_irn_idx(n->irn)];
923                                                 if(idx < n_idx)
924                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
925                                         }
926                                 }
927                         }
928                 }
929         }
930
931         xfree(node_map);
932 }
933
934 /*
935          ___ _____ ____   ____   ___ _____   ____                        _
936         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
937          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
938          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
939         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
940                                                                   |_|            |___/
941 */
942
943 static const char *get_dot_color_name(size_t col)
944 {
945         static const char *names[] = {
946                 "blue",
947                 "red",
948                 "green",
949                 "yellow",
950                 "cyan",
951                 "magenta",
952                 "orange",
953                 "chocolate",
954                 "beige",
955                 "navy",
956                 "darkgreen",
957                 "darkred",
958                 "lightPink",
959                 "chartreuse",
960                 "lightskyblue",
961                 "linen",
962                 "pink",
963                 "lightslateblue",
964                 "mintcream",
965                 "red",
966                 "darkolivegreen",
967                 "mediumblue",
968                 "mistyrose",
969                 "salmon",
970                 "darkseagreen",
971                 "mediumslateblue"
972                 "moccasin",
973                 "tomato",
974                 "forestgreen",
975                 "darkturquoise",
976                 "palevioletred"
977         };
978
979         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
980 }
981
982 typedef struct _co_ifg_dump_t {
983         const copy_opt_t *co;
984         unsigned flags;
985 } co_ifg_dump_t;
986
987 static void ifg_dump_graph_attr(FILE *f, void *self)
988 {
989         (void) self;
990         fprintf(f, "overlap=scale");
991 }
992
993 static int ifg_is_dump_node(void *self, ir_node *irn)
994 {
995         (void)self;
996         return !arch_irn_is_ignore(irn);
997 }
998
999 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1000 {
1001         co_ifg_dump_t             *env     = self;
1002         const arch_register_t     *reg     = arch_get_irn_register(irn);
1003         const arch_register_req_t *req     = arch_get_register_req_out(irn);
1004         int                        limited = arch_register_req_is(req, limited);
1005
1006         if(env->flags & CO_IFG_DUMP_LABELS) {
1007                 ir_fprintf(f, "label=\"%+F", irn);
1008
1009                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1010                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1011                         rbitset_copy_to_bitset(req->limited, bs);
1012                         ir_fprintf(f, "\\n%B", bs);
1013                 }
1014                 ir_fprintf(f, "\" ");
1015         } else {
1016                 fprintf(f, "label=\"\" shape=point " );
1017         }
1018
1019         if(env->flags & CO_IFG_DUMP_SHAPE)
1020                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1021
1022         if(env->flags & CO_IFG_DUMP_COLORS)
1023                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1024 }
1025
1026 static void ifg_dump_at_end(FILE *file, void *self)
1027 {
1028         co_ifg_dump_t *env = self;
1029         affinity_node_t *a;
1030
1031         co_gs_foreach_aff_node(env->co, a) {
1032                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1033                 unsigned aidx = get_irn_idx(a->irn);
1034                 neighb_t *n;
1035
1036                 co_gs_foreach_neighb(a, n) {
1037                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1038                         unsigned nidx = get_irn_idx(n->irn);
1039
1040                         if(aidx < nidx) {
1041                                 const char *color = nr == ar ? "blue" : "red";
1042                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1043                                 if(env->flags & CO_IFG_DUMP_LABELS)
1044                                         fprintf(file, "label=\"%d\" ", n->costs);
1045                                 if(env->flags & CO_IFG_DUMP_COLORS)
1046                                         fprintf(file, "color=%s ", color);
1047                                 else
1048                                         fprintf(file, "style=dotted");
1049                                 fprintf(file, "];\n");
1050                         }
1051                 }
1052         }
1053 }
1054
1055
1056 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1057         ifg_is_dump_node,
1058         ifg_dump_graph_attr,
1059         ifg_dump_node_attr,
1060         NULL,
1061         NULL,
1062         ifg_dump_at_end
1063 };
1064
1065
1066
1067 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1068 {
1069         co_ifg_dump_t cod;
1070
1071         cod.co    = co;
1072         cod.flags = flags;
1073         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1074 }
1075
1076
1077 void co_solve_park_moon(copy_opt_t *opt)
1078 {
1079         (void) opt;
1080 }
1081
1082 static int void_algo(copy_opt_t *co)
1083 {
1084         (void) co;
1085         return 0;
1086 }
1087
1088 /*
1089                 _    _                  _ _   _
1090            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1091           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1092          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1093         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1094                            |___/
1095 */
1096
1097 typedef struct {
1098         co_algo_t  *algo;
1099         const char *name;
1100         int        can_improve_existing;
1101 } co_algo_info_t;
1102
1103 static const co_algo_info_t algos[] = {
1104         { void_algo,               "none",  0 },
1105         { co_solve_heuristic,      "heur1", 0 },
1106         { co_solve_heuristic_new,  "heur2", 0 },
1107         { co_solve_heuristic_mst,  "heur4", 0 },
1108 #ifdef WITH_ILP
1109         { co_solve_ilp2,           "ilp",   1 },
1110 #else
1111         { NULL,                    "ilp",   1 },
1112 #endif
1113 #ifdef FIRM_KAPS
1114         { co_solve_heuristic_pbqp, "pbqp",  0 },
1115 #endif
1116         { NULL,                    "",      0 }
1117 };
1118
1119 /*
1120     __  __       _         ____       _
1121    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1122    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1123    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1124    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1125
1126 */
1127
1128 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1129 {
1130         FILE *result;
1131         char buf[1024];
1132         size_t i, n;
1133         char *tu_name;
1134
1135         n = strlen(env->birg->main_env->cup_name);
1136         tu_name = XMALLOCN(char, n + 1);
1137         strcpy(tu_name, env->birg->main_env->cup_name);
1138         for (i = 0; i < n; ++i)
1139                 if (tu_name[i] == '.')
1140                         tu_name[i] = '_';
1141
1142
1143         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1144         xfree(tu_name);
1145         result = fopen(buf, "wt");
1146         if(result == NULL) {
1147                 panic("Couldn't open '%s' for writing.", buf);
1148         }
1149
1150         return result;
1151 }
1152
1153 void co_driver(be_chordal_env_t *cenv)
1154 {
1155         ir_timer_t          *timer = ir_timer_register("firm.be.copyopt", "runtime");
1156         co_complete_stats_t before, after;
1157         copy_opt_t          *co;
1158         co_algo_t           *algo_func;
1159         int                 was_optimal = 0;
1160
1161         if (algo >= CO_ALGO_LAST)
1162                 return;
1163
1164         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1165
1166         co = new_copy_opt(cenv, cost_func);
1167         co_build_ou_structure(co);
1168         co_build_graph_structure(co);
1169
1170         co_complete_stats(co, &before);
1171
1172         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1173         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1174         be_stat_ev_ull("co_max_costs",    before.max_costs);
1175         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1176         be_stat_ev_ull("co_aff_int",      before.aff_int);
1177
1178         be_stat_ev_ull("co_init_costs",   before.costs);
1179         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1180
1181         if (dump_flags & DUMP_BEFORE) {
1182                 FILE *f = my_open(cenv, "", "-before.dot");
1183                 co_dump_ifg_dot(co, f, style_flags);
1184                 fclose(f);
1185         }
1186
1187         /* if the algo can improve results, provide an initial solution with heur1 */
1188         if (improve && algos[algo].can_improve_existing) {
1189                 co_complete_stats_t stats;
1190
1191                 /* produce a heuristic solution */
1192                 co_solve_heuristic(co);
1193
1194                 /* do the stats and provide the current costs */
1195                 co_complete_stats(co, &stats);
1196                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1197         }
1198
1199         algo_func = algos[algo].algo;
1200
1201         /* perform actual copy minimization */
1202         ir_timer_reset_and_start(timer);
1203         was_optimal = algo_func(co);
1204         ir_timer_stop(timer);
1205
1206         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1207         be_stat_ev_ull("co_optimal", was_optimal);
1208
1209         if (dump_flags & DUMP_AFTER) {
1210                 FILE *f = my_open(cenv, "", "-after.dot");
1211                 co_dump_ifg_dot(co, f, style_flags);
1212                 fclose(f);
1213         }
1214
1215         co_complete_stats(co, &after);
1216
1217         if (do_stats) {
1218                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1219                 ulong64 evitable          = after.costs     - after.inevit_costs;
1220
1221                 ir_printf("%30F ", cenv->irg);
1222                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1223
1224                 if(optimizable_costs > 0)
1225                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1226                 else
1227                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1228         }
1229
1230         /* Dump the interference graph in Appel's format. */
1231         if (dump_flags & DUMP_APPEL) {
1232                 FILE *f = my_open(cenv, "", ".apl");
1233                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1234                 co_dump_appel_graph(co, f);
1235                 fclose(f);
1236         }
1237
1238         be_stat_ev_ull("co_after_costs", after.costs);
1239         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1240
1241         co_free_graph_structure(co);
1242         co_free_ou_structure(co);
1243         free_copy_opt(co);
1244 }