change register allocator and related interfaces to use ir_graph* instead of be_irg_t*
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "irbitset.h"
46 #include "irphase_t.h"
47 #include "irprintf_t.h"
48
49 #include "bemodule.h"
50 #include "bearch.h"
51 #include "benode.h"
52 #include "beutil.h"
53 #include "beifg.h"
54 #include "beintlive_t.h"
55 #include "becopyopt_t.h"
56 #include "becopystat.h"
57 #include "belive_t.h"
58 #include "beinsn_t.h"
59 #include "besched.h"
60 #include "bestatevent.h"
61 #include "beirg.h"
62 #include "error.h"
63
64 #include "lc_opts.h"
65 #include "lc_opts_enum.h"
66
67 #define DUMP_BEFORE 1
68 #define DUMP_AFTER  2
69 #define DUMP_APPEL  4
70 #define DUMP_ALL    2 * DUMP_APPEL - 1
71
72 #define COST_FUNC_FREQ     1
73 #define COST_FUNC_LOOP     2
74 #define COST_FUNC_ALL_ONE  3
75
76 static unsigned   dump_flags  = 0;
77 static unsigned   style_flags = 0;
78 static unsigned   do_stats    = 0;
79 static cost_fct_t cost_func   = co_get_costs_exec_freq;
80 static int        improve     = 1;
81
82 static const lc_opt_enum_mask_items_t dump_items[] = {
83         { "before",  DUMP_BEFORE },
84         { "after",   DUMP_AFTER  },
85         { "appel",   DUMP_APPEL  },
86         { "all",     DUMP_ALL    },
87         { NULL,      0 }
88 };
89
90 static const lc_opt_enum_mask_items_t style_items[] = {
91         { "color",   CO_IFG_DUMP_COLORS },
92         { "labels",  CO_IFG_DUMP_LABELS },
93         { "constr",  CO_IFG_DUMP_CONSTR },
94         { "shape",   CO_IFG_DUMP_SHAPE  },
95         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
96         { NULL,      0 }
97 };
98
99 typedef int (*opt_funcptr)(void);
100
101 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
102         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
103         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
104         { "one",    (opt_funcptr) co_get_costs_all_one },
105         { NULL,     NULL }
106 };
107
108 static lc_opt_enum_mask_var_t dump_var = {
109         &dump_flags, dump_items
110 };
111
112 static lc_opt_enum_mask_var_t style_var = {
113         &style_flags, style_items
114 };
115
116 static lc_opt_enum_func_ptr_var_t cost_func_var = {
117         (opt_funcptr*) &cost_func, cost_func_items
118 };
119
120 static const lc_opt_table_entry_t options[] = {
121         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
122         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
123         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
124         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
125         LC_OPT_ENT_BOOL          ("improve", "run heur1 before if algo can exploit start solutions",    &improve),
126         LC_OPT_LAST
127 };
128
129 static be_module_list_entry_t *copyopts = NULL;
130 static const co_algo_info *selected_copyopt = NULL;
131
132 void be_register_copyopt(const char *name, co_algo_info *copyopt)
133 {
134         if (selected_copyopt == NULL)
135                 selected_copyopt = copyopt;
136         be_add_module_to_list(&copyopts, name, copyopt);
137 }
138
139 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copyopt);
140 void be_init_copyopt(void)
141 {
142         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
143         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
144         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
145         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
146
147         lc_opt_add_table(co_grp, options);
148         be_add_module_list_opt(co_grp, "algo", "select copy optimization algo",
149                                        &copyopts, (void**) &selected_copyopt);
150 }
151
152 static int void_algo(copy_opt_t *co)
153 {
154         (void) co;
155         return 0;
156 }
157
158 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copynone);
159 void be_init_copynone(void)
160 {
161         static co_algo_info copyheur = {
162                 void_algo, 0
163         };
164
165         be_register_copyopt("none", &copyheur);
166 }
167
168 #undef QUICK_AND_DIRTY_HACK
169
170 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
171 {
172         if (env->ifg)
173                 return be_ifg_connected(env->ifg, a, b);
174         else {
175                 be_lv_t *lv = be_get_irg_liveness(env->irg);
176                 return be_values_interfere(lv, a, b);
177         }
178 }
179
180
181 /******************************************************************************
182     _____                           _
183    / ____|                         | |
184   | |  __  ___ _ __   ___ _ __ __ _| |
185   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
186   | |__| |  __/ | | |  __/ | | (_| | |
187    \_____|\___|_| |_|\___|_|  \__,_|_|
188
189  ******************************************************************************/
190
191 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
192
193
194 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
195 {
196         const char *s1, *s2, *s3;
197         int len;
198         copy_opt_t *co;
199
200         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
201
202         co = XMALLOCZ(copy_opt_t);
203         co->cenv      = chordal_env;
204         co->irg       = chordal_env->irg;
205         co->cls       = chordal_env->cls;
206         co->get_costs = get_costs;
207
208         s1 = get_irp_name();
209         s2 = get_entity_name(get_irg_entity(co->irg));
210         s3 = chordal_env->cls->name;
211         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
212         co->name = XMALLOCN(char, len);
213         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
214
215         return co;
216 }
217
218 void free_copy_opt(copy_opt_t *co)
219 {
220         xfree(co->name);
221         free(co);
222 }
223
224 /**
225  * Checks if a node is optimizable, viz. has something to do with coalescing
226  * @param irn  The irn to check
227  */
228 static int co_is_optimizable_root(ir_node *irn)
229 {
230         const arch_register_req_t *req;
231         const arch_register_t     *reg;
232
233         if (arch_irn_is_ignore(irn))
234                 return 0;
235
236         reg = arch_get_irn_register(irn);
237         if (arch_register_type_is(reg, ignore))
238                 return 0;
239
240         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
241                 return 1;
242
243         req = arch_get_register_req_out(irn);
244         if (is_2addr_code(req))
245                 return 1;
246
247         return 0;
248 }
249
250 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
251 {
252         int cost = 0;
253         ir_loop *loop;
254         ir_node *root_block = get_nodes_block(root);
255         (void) co;
256         (void) arg;
257
258         if (is_Phi(root)) {
259                 /* for phis the copies are placed in the corresponding pred-block */
260                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
261         } else {
262                 /* a perm places the copy in the same block as it resides */
263                 loop = get_irn_loop(root_block);
264         }
265         if (loop) {
266                 int d = get_loop_depth(loop);
267                 cost = d*d;
268         }
269         return 1+cost;
270 }
271
272 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
273 {
274         int res;
275         ir_node *root_bl = get_nodes_block(root);
276         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
277         ir_exec_freq *exec_freq = be_get_irg_exec_freq(co->cenv->irg);
278         (void) arg;
279         res = get_block_execfreq_ulong(exec_freq, copy_bl);
280
281         /* don't allow values smaller than one. */
282         return res < 1 ? 1 : res;
283 }
284
285
286 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos)
287 {
288         (void) co;
289         (void) root;
290         (void) arg;
291         (void) pos;
292         return 1;
293 }
294
295 /******************************************************************************
296    ____        _   _    _       _ _          _____ _
297   / __ \      | | | |  | |     (_) |        / ____| |
298  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
299  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
300  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
301   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
302         | |                                                            __/ |
303         |_|                                                           |___/
304  ******************************************************************************/
305
306 /**
307  * Determines a maximum weighted independent set with respect to
308  * the interference and conflict edges of all nodes in a qnode.
309  */
310 static int ou_max_ind_set_costs(unit_t *ou)
311 {
312         be_chordal_env_t *chordal_env = ou->co->cenv;
313         ir_node **safe, **unsafe;
314         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
315         bitset_t *curr;
316         unsigned  pos;
317         int curr_weight, best_weight = 0;
318
319         /* assign the nodes into two groups.
320          * safe: node has no interference, hence it is in every max stable set.
321          * unsafe: node has an interference
322          */
323         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
324         safe_costs   = 0;
325         safe_count   = 0;
326         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
327         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
328         unsafe_count = 0;
329         for (i=1; i<ou->node_count; ++i) {
330                 int is_safe = 1;
331                 for (o=1; o<ou->node_count; ++o) {
332                         if (i==o)
333                                 continue;
334                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
335                                 unsafe_costs[unsafe_count] = ou->costs[i];
336                                 unsafe[unsafe_count] = ou->nodes[i];
337                                 ++unsafe_count;
338                                 is_safe = 0;
339                                 break;
340                         }
341                 }
342                 if (is_safe) {
343                         safe_costs += ou->costs[i];
344                         safe[safe_count++] = ou->nodes[i];
345                 }
346         }
347
348
349         /* now compute the best set out of the unsafe nodes*/
350         if (unsafe_count > MIS_HEUR_TRIGGER) {
351                 bitset_t *best = bitset_alloca(unsafe_count);
352                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
353                 for (i=0; i<unsafe_count; ++i) {
354                         bitset_set(best, i);
355                         /* check if it is a stable set */
356                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
357                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
358                                         bitset_clear(best, i); /* clear the bit and try next one */
359                                         break;
360                                 }
361                 }
362                 /* compute the weight */
363                 bitset_foreach(best, pos)
364                         best_weight += unsafe_costs[pos];
365         } else {
366                 /* Exact Algorithm: Brute force */
367                 curr = bitset_alloca(unsafe_count);
368                 bitset_set_all(curr);
369                 while (bitset_popcount(curr) != 0) {
370                         /* check if curr is a stable set */
371                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
372                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
373                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
374                                                         goto no_stable_set;
375
376                         /* if we arrive here, we have a stable set */
377                         /* compute the weigth of the stable set*/
378                         curr_weight = 0;
379                         bitset_foreach(curr, pos)
380                                 curr_weight += unsafe_costs[pos];
381
382                         /* any better ? */
383                         if (curr_weight > best_weight) {
384                                 best_weight = curr_weight;
385                         }
386
387         no_stable_set:
388                         bitset_minus1(curr);
389                 }
390         }
391
392         return safe_costs+best_weight;
393 }
394
395 static void co_collect_units(ir_node *irn, void *env)
396 {
397         const arch_register_req_t *req;
398         copy_opt_t                *co  = env;
399         unit_t *unit;
400
401         if (get_irn_mode(irn) == mode_T)
402                 return;
403         req = arch_get_register_req_out(irn);
404         if (req->cls != co->cls)
405                 return;
406         if (!co_is_optimizable_root(irn))
407                 return;
408
409         /* Init a new unit */
410         unit = XMALLOCZ(unit_t);
411         unit->co = co;
412         unit->node_count = 1;
413         INIT_LIST_HEAD(&unit->queue);
414
415         /* Phi with some/all of its arguments */
416         if (is_Reg_Phi(irn)) {
417                 int i, arity;
418
419                 /* init */
420                 arity = get_irn_arity(irn);
421                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
422                 unit->costs = XMALLOCN(int,      arity + 1);
423                 unit->nodes[0] = irn;
424
425                 /* fill */
426                 for (i=0; i<arity; ++i) {
427                         int o, arg_pos;
428                         ir_node *arg = get_irn_n(irn, i);
429
430                         assert(arch_get_irn_reg_class_out(arg) == co->cls && "Argument not in same register class.");
431                         if (arg == irn)
432                                 continue;
433                         if (nodes_interfere(co->cenv, irn, arg)) {
434                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
435                                 continue;
436                         }
437
438                         /* Else insert the argument of the phi to the members of this ou */
439                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
440
441                         if (arch_irn_is_ignore(arg))
442                                 continue;
443
444                         /* Check if arg has occurred at a prior position in the arg/list */
445                         arg_pos = 0;
446                         for (o=1; o<unit->node_count; ++o) {
447                                 if (unit->nodes[o] == arg) {
448                                         arg_pos = o;
449                                         break;
450                                 }
451                         }
452
453                         if (!arg_pos) { /* a new argument */
454                                 /* insert node, set costs */
455                                 unit->nodes[unit->node_count] = arg;
456                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
457                                 unit->node_count++;
458                         } else { /* arg has occurred before in same phi */
459                                 /* increase costs for existing arg */
460                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
461                         }
462                 }
463                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
464                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
465         } else if (is_Perm_Proj(irn)) {
466                 /* Proj of a perm with corresponding arg */
467                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
468                 unit->nodes = XMALLOCN(ir_node*, 2);
469                 unit->costs = XMALLOCN(int,      2);
470                 unit->node_count = 2;
471                 unit->nodes[0] = irn;
472                 unit->nodes[1] = get_Perm_src(irn);
473                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
474         } else {
475                 /* Src == Tgt of a 2-addr-code instruction */
476                 if (is_2addr_code(req)) {
477                         const unsigned other = req->other_same;
478                         int            count = 0;
479                         int            i;
480
481                         for (i = 0; (1U << i) <= other; ++i) {
482                                 if (other & (1U << i)) {
483                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
484                                         if (arch_irn_is_ignore(o))
485                                                 continue;
486                                         if (nodes_interfere(co->cenv, irn, o))
487                                                 continue;
488                                         ++count;
489                                 }
490                         }
491
492                         if (count != 0) {
493                                 int k = 0;
494                                 ++count;
495                                 unit->nodes = XMALLOCN(ir_node*, count);
496                                 unit->costs = XMALLOCN(int,      count);
497                                 unit->node_count = count;
498                                 unit->nodes[k++] = irn;
499
500                                 for (i = 0; 1U << i <= other; ++i) {
501                                         if (other & (1U << i)) {
502                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
503                                                 if (!arch_irn_is_ignore(o) &&
504                                                                 !nodes_interfere(co->cenv, irn, o)) {
505                                                         unit->nodes[k] = o;
506                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
507                                                         ++k;
508                                                 }
509                                         }
510                                 }
511                         }
512                 } else {
513                         assert(0 && "This is not an optimizable node!");
514                 }
515         }
516
517         /* Insert the new unit at a position according to its costs */
518         if (unit->node_count > 1) {
519                 int i;
520                 struct list_head *tmp;
521
522                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
523                 for (i=1; i<unit->node_count; ++i) {
524                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
525                         unit->all_nodes_costs += unit->costs[i];
526                 }
527
528                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
529                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
530                 /* Insert the new ou according to its sort_key */
531                 tmp = &co->units;
532                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
533                         tmp = tmp->next;
534                 list_add(&unit->units, tmp);
535         } else {
536                 free(unit);
537         }
538 }
539
540 #ifdef QUICK_AND_DIRTY_HACK
541
542 static int compare_ous(const void *k1, const void *k2)
543 {
544         const unit_t *u1 = *((const unit_t **) k1);
545         const unit_t *u2 = *((const unit_t **) k2);
546         int i, o, u1_has_constr, u2_has_constr;
547         arch_register_req_t req;
548
549         /* Units with constraints come first */
550         u1_has_constr = 0;
551         for (i=0; i<u1->node_count; ++i) {
552                 arch_get_register_req_out(&req, u1->nodes[i]);
553                 if (arch_register_req_is(&req, limited)) {
554                         u1_has_constr = 1;
555                         break;
556                 }
557         }
558
559         u2_has_constr = 0;
560         for (i=0; i<u2->node_count; ++i) {
561                 arch_get_register_req_out(&req, u2->nodes[i]);
562                 if (arch_register_req_is(&req, limited)) {
563                         u2_has_constr = 1;
564                         break;
565                 }
566         }
567
568         if (u1_has_constr != u2_has_constr)
569                 return u2_has_constr - u1_has_constr;
570
571         /* Now check, whether the two units are connected */
572 #if 0
573         for (i=0; i<u1->node_count; ++i)
574                 for (o=0; o<u2->node_count; ++o)
575                         if (u1->nodes[i] == u2->nodes[o])
576                                 return 0;
577 #endif
578
579         /* After all, the sort key decides. Greater keys come first. */
580         return u2->sort_key - u1->sort_key;
581
582 }
583
584 /**
585  * Sort the ou's according to constraints and their sort_key
586  */
587 static void co_sort_units(copy_opt_t *co)
588 {
589         int i, count = 0, costs;
590         unit_t *ou, **ous;
591
592         /* get the number of ous, remove them form the list and fill the array */
593         list_for_each_entry(unit_t, ou, &co->units, units)
594                 count++;
595         ous = ALLOCAN(unit_t, count);
596
597         costs = co_get_max_copy_costs(co);
598
599         i = 0;
600         list_for_each_entry(unit_t, ou, &co->units, units)
601                 ous[i++] = ou;
602
603         INIT_LIST_HEAD(&co->units);
604
605         assert(count == i && list_empty(&co->units));
606
607         for (i=0; i<count; ++i)
608                 ir_printf("%+F\n", ous[i]->nodes[0]);
609
610         qsort(ous, count, sizeof(*ous), compare_ous);
611
612         ir_printf("\n\n");
613         for (i=0; i<count; ++i)
614                 ir_printf("%+F\n", ous[i]->nodes[0]);
615
616         /* reinsert into list in correct order */
617         for (i=0; i<count; ++i)
618                 list_add_tail(&ous[i]->units, &co->units);
619
620         assert(costs == co_get_max_copy_costs(co));
621 }
622 #endif
623
624 void co_build_ou_structure(copy_opt_t *co)
625 {
626         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
627         INIT_LIST_HEAD(&co->units);
628         irg_walk_graph(co->irg, co_collect_units, NULL, co);
629 #ifdef QUICK_AND_DIRTY_HACK
630         co_sort_units(co);
631 #endif
632 }
633
634 void co_free_ou_structure(copy_opt_t *co)
635 {
636         unit_t *curr, *tmp;
637         ASSERT_OU_AVAIL(co);
638         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
639                 xfree(curr->nodes);
640                 xfree(curr->costs);
641                 xfree(curr);
642         }
643         co->units.next = NULL;
644 }
645
646 /* co_solve_heuristic() is implemented in becopyheur.c */
647
648 int co_get_max_copy_costs(const copy_opt_t *co)
649 {
650         int i, res = 0;
651         unit_t *curr;
652
653         ASSERT_OU_AVAIL(co);
654
655         list_for_each_entry(unit_t, curr, &co->units, units) {
656                 res += curr->inevitable_costs;
657                 for (i=1; i<curr->node_count; ++i)
658                         res += curr->costs[i];
659         }
660         return res;
661 }
662
663 int co_get_inevit_copy_costs(const copy_opt_t *co)
664 {
665         int res = 0;
666         unit_t *curr;
667
668         ASSERT_OU_AVAIL(co);
669
670         list_for_each_entry(unit_t, curr, &co->units, units)
671                 res += curr->inevitable_costs;
672         return res;
673 }
674
675 int co_get_copy_costs(const copy_opt_t *co)
676 {
677         int i, res = 0;
678         unit_t *curr;
679
680         ASSERT_OU_AVAIL(co);
681
682         list_for_each_entry(unit_t, curr, &co->units, units) {
683                 int root_col = get_irn_col(curr->nodes[0]);
684                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
685                 res += curr->inevitable_costs;
686                 for (i=1; i<curr->node_count; ++i) {
687                         int arg_col = get_irn_col(curr->nodes[i]);
688                         if (root_col != arg_col) {
689                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
690                                 res += curr->costs[i];
691                         }
692                 }
693         }
694         return res;
695 }
696
697 int co_get_lower_bound(const copy_opt_t *co)
698 {
699         int res = 0;
700         unit_t *curr;
701
702         ASSERT_OU_AVAIL(co);
703
704         list_for_each_entry(unit_t, curr, &co->units, units)
705                 res += curr->inevitable_costs + curr->min_nodes_costs;
706         return res;
707 }
708
709 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
710 {
711         bitset_t *seen = bitset_irg_malloc(co->irg);
712         affinity_node_t *an;
713
714         memset(stat, 0, sizeof(stat[0]));
715
716         /* count affinity edges. */
717         co_gs_foreach_aff_node(co, an) {
718                 neighb_t *neigh;
719                 stat->aff_nodes += 1;
720                 bitset_add_irn(seen, an->irn);
721                 co_gs_foreach_neighb(an, neigh) {
722                         if (!bitset_contains_irn(seen, neigh->irn)) {
723                                 stat->aff_edges += 1;
724                                 stat->max_costs += neigh->costs;
725
726                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
727                                         stat->costs += neigh->costs;
728                                         stat->unsatisfied_edges += 1;
729                                 }
730
731                                 if (nodes_interfere(co->cenv, an->irn, neigh->irn)) {
732                                         stat->aff_int += 1;
733                                         stat->inevit_costs += neigh->costs;
734                                 }
735
736                         }
737                 }
738         }
739
740         bitset_free(seen);
741 }
742
743 /******************************************************************************
744    _____                 _        _____ _
745   / ____|               | |      / ____| |
746  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
747  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
748  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
749   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
750                   | |                                       __/ |
751                   |_|                                      |___/
752  ******************************************************************************/
753
754 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size)
755 {
756         const affinity_node_t *n1 = k1;
757         const affinity_node_t *n2 = k2;
758         (void) size;
759
760         return (n1->irn != n2->irn);
761 }
762
763 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
764 {
765         affinity_node_t new_node, *node;
766         neighb_t        *nbr;
767         int             allocnew = 1;
768
769         new_node.irn        = n1;
770         new_node.degree     = 0;
771         new_node.neighbours = NULL;
772         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
773
774         for (nbr = node->neighbours; nbr; nbr = nbr->next)
775                 if (nbr->irn == n2) {
776                         allocnew = 0;
777                         break;
778                 }
779
780         /* if we did not find n2 in n1's neighbourhood insert it */
781         if (allocnew) {
782                 nbr        = OALLOC(&co->obst, neighb_t);
783                 nbr->irn   = n2;
784                 nbr->costs = 0;
785                 nbr->next  = node->neighbours;
786
787                 node->neighbours = nbr;
788                 node->degree++;
789         }
790
791         /* now nbr points to n1's neighbour-entry of n2 */
792         nbr->costs += costs;
793 }
794
795 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
796 {
797         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
798                 add_edge(co, n1, n2, costs);
799                 add_edge(co, n2, n1, costs);
800         }
801 }
802
803 static void build_graph_walker(ir_node *irn, void *env)
804 {
805         const arch_register_req_t *req;
806         copy_opt_t                *co  = env;
807         int pos, max;
808         const arch_register_t *reg;
809
810         if (get_irn_mode(irn) == mode_T)
811                 return;
812         req = arch_get_register_req_out(irn);
813         if (req->cls != co->cls || arch_irn_is_ignore(irn))
814                 return;
815
816         reg = arch_get_irn_register(irn);
817         if (arch_register_type_is(reg, ignore))
818                 return;
819
820         if (is_Reg_Phi(irn)) { /* Phis */
821                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
822                         ir_node *arg = get_irn_n(irn, pos);
823                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
824                 }
825         } else if (is_Perm_Proj(irn)) { /* Perms */
826                 ir_node *arg = get_Perm_src(irn);
827                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
828         } else { /* 2-address code */
829                 if (is_2addr_code(req)) {
830                         const unsigned other = req->other_same;
831                         int i;
832
833                         for (i = 0; 1U << i <= other; ++i) {
834                                 if (other & (1U << i)) {
835                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
836                                         if (!arch_irn_is_ignore(other))
837                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
838                                 }
839                         }
840                 }
841         }
842 }
843
844 void co_build_graph_structure(copy_opt_t *co)
845 {
846         obstack_init(&co->obst);
847         co->nodes = new_set(compare_affinity_node_t, 32);
848
849         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
850 }
851
852 void co_free_graph_structure(copy_opt_t *co)
853 {
854         ASSERT_GS_AVAIL(co);
855
856         del_set(co->nodes);
857         obstack_free(&co->obst, NULL);
858         co->nodes = NULL;
859 }
860
861 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
862
863 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn)
864 {
865         affinity_node_t new_node, *n;
866
867         ASSERT_GS_AVAIL(co);
868
869         new_node.irn = irn;
870         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
871         if (n) {
872                 return (n->degree > 0);
873         } else
874                 return 0;
875 }
876
877 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
878 {
879         ir_node *nodes[]  = { a, b };
880         bitset_t *constr[] = { NULL, NULL };
881         int j;
882
883         constr[0] = bitset_alloca(co->cls->n_regs);
884         constr[1] = bitset_alloca(co->cls->n_regs);
885
886         for (j = 0; j < 2; ++j) {
887                 const arch_register_req_t *req = arch_get_register_req_out(nodes[j]);
888                 if (arch_register_req_is(req, limited))
889                         rbitset_copy_to_bitset(req->limited, constr[j]);
890                 else
891                         bitset_set_all(constr[j]);
892
893         }
894
895         return !bitset_intersect(constr[0], constr[1]);
896 }
897
898 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
899 {
900         be_ifg_t *ifg       = co->cenv->ifg;
901         int      *color_map = ALLOCAN(int, co->cls->n_regs);
902         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
903
904         ir_node *irn;
905         nodes_iter_t it;
906         neighbours_iter_t nit;
907         int n, n_regs;
908         unsigned i;
909
910         n_regs = 0;
911         for (i = 0; i < co->cls->n_regs; ++i) {
912                 const arch_register_t *reg = &co->cls->regs[i];
913                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
914         }
915
916         /*
917          * n contains the first node number.
918          * the values below n are the pre-colored register nodes
919          */
920
921         n = n_regs;
922         be_ifg_foreach_node(ifg, &it, irn) {
923                 if (arch_irn_is_ignore(irn))
924                         continue;
925                 node_map[get_irn_idx(irn)] = n++;
926         }
927
928         fprintf(f, "%d %d\n", n, n_regs);
929
930         be_ifg_foreach_node(ifg, &it, irn) {
931                 if (!arch_irn_is_ignore(irn)) {
932                         int idx                        = node_map[get_irn_idx(irn)];
933                         affinity_node_t           *a   = get_affinity_info(co, irn);
934                         const arch_register_req_t *req = arch_get_register_req_out(irn);
935                         ir_node                   *adj;
936
937                         if (arch_register_req_is(req, limited)) {
938                                 for (i = 0; i < co->cls->n_regs; ++i) {
939                                         if (!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
940                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
941                                 }
942                         }
943
944                         be_ifg_foreach_neighbour(ifg, &nit, irn, adj) {
945                                 if (!arch_irn_is_ignore(adj) &&
946                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
947                                         int adj_idx = node_map[get_irn_idx(adj)];
948                                         if (idx < adj_idx)
949                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
950                                 }
951                         }
952
953                         if (a) {
954                                 neighb_t *n;
955
956                                 co_gs_foreach_neighb(a, n) {
957                                         if (!arch_irn_is_ignore(n->irn)) {
958                                                 int n_idx = node_map[get_irn_idx(n->irn)];
959                                                 if (idx < n_idx)
960                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
961                                         }
962                                 }
963                         }
964                 }
965         }
966
967         xfree(node_map);
968 }
969
970 /*
971          ___ _____ ____   ____   ___ _____   ____                        _
972         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
973          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
974          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
975         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
976                                                                   |_|            |___/
977 */
978
979 static const char *get_dot_color_name(size_t col)
980 {
981         static const char *names[] = {
982                 "blue",
983                 "red",
984                 "green",
985                 "yellow",
986                 "cyan",
987                 "magenta",
988                 "orange",
989                 "chocolate",
990                 "beige",
991                 "navy",
992                 "darkgreen",
993                 "darkred",
994                 "lightPink",
995                 "chartreuse",
996                 "lightskyblue",
997                 "linen",
998                 "pink",
999                 "lightslateblue",
1000                 "mintcream",
1001                 "red",
1002                 "darkolivegreen",
1003                 "mediumblue",
1004                 "mistyrose",
1005                 "salmon",
1006                 "darkseagreen",
1007                 "mediumslateblue"
1008                 "moccasin",
1009                 "tomato",
1010                 "forestgreen",
1011                 "darkturquoise",
1012                 "palevioletred"
1013         };
1014
1015         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1016 }
1017
1018 typedef struct _co_ifg_dump_t {
1019         const copy_opt_t *co;
1020         unsigned flags;
1021 } co_ifg_dump_t;
1022
1023 static void ifg_dump_graph_attr(FILE *f, void *self)
1024 {
1025         (void) self;
1026         fprintf(f, "overlap=scale");
1027 }
1028
1029 static int ifg_is_dump_node(void *self, ir_node *irn)
1030 {
1031         (void)self;
1032         return !arch_irn_is_ignore(irn);
1033 }
1034
1035 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1036 {
1037         co_ifg_dump_t             *env     = self;
1038         const arch_register_t     *reg     = arch_get_irn_register(irn);
1039         const arch_register_req_t *req     = arch_get_register_req_out(irn);
1040         int                        limited = arch_register_req_is(req, limited);
1041
1042         if (env->flags & CO_IFG_DUMP_LABELS) {
1043                 ir_fprintf(f, "label=\"%+F", irn);
1044
1045                 if ((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1046                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1047                         rbitset_copy_to_bitset(req->limited, bs);
1048                         ir_fprintf(f, "\\n%B", bs);
1049                 }
1050                 ir_fprintf(f, "\" ");
1051         } else {
1052                 fprintf(f, "label=\"\" shape=point " );
1053         }
1054
1055         if (env->flags & CO_IFG_DUMP_SHAPE)
1056                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1057
1058         if (env->flags & CO_IFG_DUMP_COLORS)
1059                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1060 }
1061
1062 static void ifg_dump_at_end(FILE *file, void *self)
1063 {
1064         co_ifg_dump_t *env = self;
1065         affinity_node_t *a;
1066
1067         co_gs_foreach_aff_node(env->co, a) {
1068                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1069                 unsigned aidx = get_irn_idx(a->irn);
1070                 neighb_t *n;
1071
1072                 co_gs_foreach_neighb(a, n) {
1073                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1074                         unsigned nidx = get_irn_idx(n->irn);
1075
1076                         if (aidx < nidx) {
1077                                 const char *color = nr == ar ? "blue" : "red";
1078                                 fprintf(file, "\tn%u -- n%u [weight=0.01 ", aidx, nidx);
1079                                 if (env->flags & CO_IFG_DUMP_LABELS)
1080                                         fprintf(file, "label=\"%d\" ", n->costs);
1081                                 if (env->flags & CO_IFG_DUMP_COLORS)
1082                                         fprintf(file, "color=%s ", color);
1083                                 else
1084                                         fprintf(file, "style=dotted");
1085                                 fprintf(file, "];\n");
1086                         }
1087                 }
1088         }
1089 }
1090
1091
1092 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1093         ifg_is_dump_node,
1094         ifg_dump_graph_attr,
1095         ifg_dump_node_attr,
1096         NULL,
1097         NULL,
1098         ifg_dump_at_end
1099 };
1100
1101
1102
1103 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1104 {
1105         co_ifg_dump_t cod;
1106
1107         cod.co    = co;
1108         cod.flags = flags;
1109         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1110 }
1111
1112
1113 void co_solve_park_moon(copy_opt_t *opt)
1114 {
1115         (void) opt;
1116 }
1117
1118 /*
1119     __  __       _         ____       _
1120    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1121    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1122    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1123    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1124
1125 */
1126
1127 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1128 {
1129         FILE *result;
1130         char buf[1024];
1131         size_t i, n;
1132         char *tu_name;
1133         const char *cup_name = be_birg_from_irg(env->irg)->main_env->cup_name;
1134
1135         n = strlen(cup_name);
1136         tu_name = XMALLOCN(char, n + 1);
1137         strcpy(tu_name, cup_name);
1138         for (i = 0; i < n; ++i)
1139                 if (tu_name[i] == '.')
1140                         tu_name[i] = '_';
1141
1142
1143         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1144         xfree(tu_name);
1145         result = fopen(buf, "wt");
1146         if (result == NULL) {
1147                 panic("Couldn't open '%s' for writing.", buf);
1148         }
1149
1150         return result;
1151 }
1152
1153 void co_driver(be_chordal_env_t *cenv)
1154 {
1155         ir_timer_t          *timer = ir_timer_new();
1156         co_complete_stats_t before, after;
1157         copy_opt_t          *co;
1158         int                 was_optimal = 0;
1159
1160         assert(selected_copyopt);
1161
1162         /* skip copymin if algo is 'none' */
1163         if (selected_copyopt->copyopt == void_algo)
1164                 return;
1165
1166         be_liveness_assure_chk(be_get_irg_liveness(cenv->irg));
1167
1168         co = new_copy_opt(cenv, cost_func);
1169         co_build_ou_structure(co);
1170         co_build_graph_structure(co);
1171
1172         co_complete_stats(co, &before);
1173
1174         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1175         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1176         be_stat_ev_ull("co_max_costs",    before.max_costs);
1177         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1178         be_stat_ev_ull("co_aff_int",      before.aff_int);
1179
1180         be_stat_ev_ull("co_init_costs",   before.costs);
1181         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1182
1183         if (dump_flags & DUMP_BEFORE) {
1184                 FILE *f = my_open(cenv, "", "-before.dot");
1185                 co_dump_ifg_dot(co, f, style_flags);
1186                 fclose(f);
1187         }
1188
1189         /* if the algo can improve results, provide an initial solution with heur1 */
1190         if (improve && selected_copyopt->can_improve_existing) {
1191                 co_complete_stats_t stats;
1192
1193                 /* produce a heuristic solution */
1194                 co_solve_heuristic(co);
1195
1196                 /* do the stats and provide the current costs */
1197                 co_complete_stats(co, &stats);
1198                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1199         }
1200
1201         /* perform actual copy minimization */
1202         ir_timer_reset_and_start(timer);
1203         was_optimal = selected_copyopt->copyopt(co);
1204         ir_timer_stop(timer);
1205
1206         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1207         be_stat_ev_ull("co_optimal", was_optimal);
1208         ir_timer_free(timer);
1209
1210         if (dump_flags & DUMP_AFTER) {
1211                 FILE *f = my_open(cenv, "", "-after.dot");
1212                 co_dump_ifg_dot(co, f, style_flags);
1213                 fclose(f);
1214         }
1215
1216         co_complete_stats(co, &after);
1217
1218         if (do_stats) {
1219                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1220                 ulong64 evitable          = after.costs     - after.inevit_costs;
1221
1222                 ir_printf("%30F ", cenv->irg);
1223                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1224
1225                 if (optimizable_costs > 0)
1226                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1227                 else
1228                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1229         }
1230
1231         /* Dump the interference graph in Appel's format. */
1232         if (dump_flags & DUMP_APPEL) {
1233                 FILE *f = my_open(cenv, "", ".apl");
1234                 fprintf(f, "# %llu %llu\n", after.costs, after.unsatisfied_edges);
1235                 co_dump_appel_graph(co, f);
1236                 fclose(f);
1237         }
1238
1239         be_stat_ev_ull("co_after_costs", after.costs);
1240         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1241
1242         co_free_graph_structure(co);
1243         co_free_ou_structure(co);
1244         free_copy_opt(co);
1245 }