differntiate entity kinds
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  *
26  * Main file for the optimization reducing the copies needed for:
27  * - Phi coalescing
28  * - Register-constrained nodes
29  * - Two-address code instructions
30  */
31 #include "config.h"
32
33 #include "execfreq.h"
34 #include "xmalloc.h"
35 #include "debug.h"
36 #include "pmap.h"
37 #include "raw_bitset.h"
38 #include "irnode.h"
39 #include "irgraph.h"
40 #include "irgwalk.h"
41 #include "irprog.h"
42 #include "irloop_t.h"
43 #include "iredges_t.h"
44 #include "irprintf_t.h"
45 #include "irtools.h"
46 #include "util.h"
47
48 #include "bemodule.h"
49 #include "bearch.h"
50 #include "benode.h"
51 #include "beutil.h"
52 #include "beifg.h"
53 #include "beintlive_t.h"
54 #include "becopyopt_t.h"
55 #include "becopystat.h"
56 #include "belive_t.h"
57 #include "beinsn_t.h"
58 #include "besched.h"
59 #include "bestatevent.h"
60 #include "beirg.h"
61 #include "error.h"
62
63 #include "lc_opts.h"
64 #include "lc_opts_enum.h"
65
66 #define DUMP_BEFORE 1
67 #define DUMP_AFTER  2
68 #define DUMP_APPEL  4
69 #define DUMP_ALL    2 * DUMP_APPEL - 1
70
71 #define COST_FUNC_FREQ     1
72 #define COST_FUNC_LOOP     2
73 #define COST_FUNC_ALL_ONE  3
74
75 static unsigned   dump_flags  = 0;
76 static unsigned   style_flags = 0;
77 static int        do_stats    = 0;
78 static cost_fct_t cost_func   = co_get_costs_exec_freq;
79 static int        improve     = 1;
80
81 static const lc_opt_enum_mask_items_t dump_items[] = {
82         { "before",  DUMP_BEFORE },
83         { "after",   DUMP_AFTER  },
84         { "appel",   DUMP_APPEL  },
85         { "all",     DUMP_ALL    },
86         { NULL,      0 }
87 };
88
89 static const lc_opt_enum_mask_items_t style_items[] = {
90         { "color",   CO_IFG_DUMP_COLORS },
91         { "labels",  CO_IFG_DUMP_LABELS },
92         { "constr",  CO_IFG_DUMP_CONSTR },
93         { "shape",   CO_IFG_DUMP_SHAPE  },
94         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
95         { NULL,      0 }
96 };
97
98 typedef int (*opt_funcptr)(void);
99
100 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
101         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
102         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
103         { "one",    (opt_funcptr) co_get_costs_all_one },
104         { NULL,     NULL }
105 };
106
107 static lc_opt_enum_mask_var_t dump_var = {
108         &dump_flags, dump_items
109 };
110
111 static lc_opt_enum_mask_var_t style_var = {
112         &style_flags, style_items
113 };
114
115 static lc_opt_enum_func_ptr_var_t cost_func_var = {
116         (opt_funcptr*) &cost_func, cost_func_items
117 };
118
119 static const lc_opt_table_entry_t options[] = {
120         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
121         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
122         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
123         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
124         LC_OPT_ENT_BOOL          ("improve", "run heur1 before if algo can exploit start solutions",    &improve),
125         LC_OPT_LAST
126 };
127
128 static be_module_list_entry_t *copyopts = NULL;
129 static const co_algo_info *selected_copyopt = NULL;
130
131 void be_register_copyopt(const char *name, co_algo_info *copyopt)
132 {
133         if (selected_copyopt == NULL)
134                 selected_copyopt = copyopt;
135         be_add_module_to_list(&copyopts, name, copyopt);
136 }
137
138 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copyopt)
139 void be_init_copyopt(void)
140 {
141         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
142         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
143         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
144         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
145
146         lc_opt_add_table(co_grp, options);
147         be_add_module_list_opt(co_grp, "algo", "select copy optimization algo",
148                                        &copyopts, (void**) &selected_copyopt);
149 }
150
151 static int void_algo(copy_opt_t *co)
152 {
153         (void) co;
154         return 0;
155 }
156
157 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copynone)
158 void be_init_copynone(void)
159 {
160         static co_algo_info copyheur = {
161                 void_algo, 0
162         };
163
164         be_register_copyopt("none", &copyheur);
165 }
166
167 #undef QUICK_AND_DIRTY_HACK
168
169 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
170 {
171         if (env->ifg)
172                 return be_ifg_connected(env->ifg, a, b);
173         else {
174                 be_lv_t *lv = be_get_irg_liveness(env->irg);
175                 return be_values_interfere(lv, a, b);
176         }
177 }
178
179
180 /******************************************************************************
181     _____                           _
182    / ____|                         | |
183   | |  __  ___ _ __   ___ _ __ __ _| |
184   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
185   | |__| |  __/ | | |  __/ | | (_| | |
186    \_____|\___|_| |_|\___|_|  \__,_|_|
187
188  ******************************************************************************/
189
190 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
191
192
193 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
194 {
195         const char *s1, *s2, *s3;
196         size_t len;
197         copy_opt_t *co;
198
199         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
200
201         co = XMALLOCZ(copy_opt_t);
202         co->cenv      = chordal_env;
203         co->irg       = chordal_env->irg;
204         co->cls       = chordal_env->cls;
205         co->get_costs = get_costs;
206
207         s1 = get_irp_name();
208         s2 = get_entity_name(get_irg_entity(co->irg));
209         s3 = chordal_env->cls->name;
210         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
211         co->name = XMALLOCN(char, len);
212         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
213
214         return co;
215 }
216
217 void free_copy_opt(copy_opt_t *co)
218 {
219         xfree(co->name);
220         free(co);
221 }
222
223 /**
224  * Checks if a node is optimizable, viz. has something to do with coalescing
225  * @param irn  The irn to check
226  */
227 static int co_is_optimizable_root(ir_node *irn)
228 {
229         const arch_register_req_t *req;
230
231         if (arch_irn_is_ignore(irn))
232                 return 0;
233
234         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
235                 return 1;
236
237         req = arch_get_irn_register_req(irn);
238         if (is_2addr_code(req))
239                 return 1;
240
241         return 0;
242 }
243
244 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
245 {
246         int cost = 0;
247         ir_loop *loop;
248         ir_node *root_block = get_nodes_block(root);
249         (void) co;
250         (void) arg;
251
252         if (is_Phi(root)) {
253                 /* for phis the copies are placed in the corresponding pred-block */
254                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
255         } else {
256                 /* a perm places the copy in the same block as it resides */
257                 loop = get_irn_loop(root_block);
258         }
259         if (loop) {
260                 int d = get_loop_depth(loop);
261                 cost = d*d;
262         }
263         return 1+cost;
264 }
265
266 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
267 {
268         int res;
269         ir_node *root_bl = get_nodes_block(root);
270         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
271         ir_exec_freq *exec_freq = be_get_irg_exec_freq(co->cenv->irg);
272         (void) arg;
273         res = get_block_execfreq_ulong(exec_freq, copy_bl);
274
275         /* don't allow values smaller than one. */
276         return res < 1 ? 1 : res;
277 }
278
279
280 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos)
281 {
282         (void) co;
283         (void) root;
284         (void) arg;
285         (void) pos;
286         return 1;
287 }
288
289 /******************************************************************************
290    ____        _   _    _       _ _          _____ _
291   / __ \      | | | |  | |     (_) |        / ____| |
292  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
293  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
294  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
295   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
296         | |                                                            __/ |
297         |_|                                                           |___/
298  ******************************************************************************/
299
300 /**
301  * Determines a maximum weighted independent set with respect to
302  * the interference and conflict edges of all nodes in a qnode.
303  */
304 static int ou_max_ind_set_costs(unit_t *ou)
305 {
306         be_chordal_env_t *chordal_env = ou->co->cenv;
307         ir_node **safe, **unsafe;
308         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
309         bitset_t *curr;
310         size_t  pos;
311         int curr_weight, best_weight = 0;
312
313         /* assign the nodes into two groups.
314          * safe: node has no interference, hence it is in every max stable set.
315          * unsafe: node has an interference
316          */
317         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
318         safe_costs   = 0;
319         safe_count   = 0;
320         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
321         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
322         unsafe_count = 0;
323         for (i=1; i<ou->node_count; ++i) {
324                 int is_safe = 1;
325                 for (o=1; o<ou->node_count; ++o) {
326                         if (i==o)
327                                 continue;
328                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
329                                 unsafe_costs[unsafe_count] = ou->costs[i];
330                                 unsafe[unsafe_count] = ou->nodes[i];
331                                 ++unsafe_count;
332                                 is_safe = 0;
333                                 break;
334                         }
335                 }
336                 if (is_safe) {
337                         safe_costs += ou->costs[i];
338                         safe[safe_count++] = ou->nodes[i];
339                 }
340         }
341
342
343         /* now compute the best set out of the unsafe nodes*/
344         if (unsafe_count > MIS_HEUR_TRIGGER) {
345                 bitset_t *best = bitset_alloca(unsafe_count);
346                 /* Heuristic: Greedy trial and error form index 0 to unsafe_count-1 */
347                 for (i=0; i<unsafe_count; ++i) {
348                         bitset_set(best, i);
349                         /* check if it is a stable set */
350                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
351                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
352                                         bitset_clear(best, i); /* clear the bit and try next one */
353                                         break;
354                                 }
355                 }
356                 /* compute the weight */
357                 bitset_foreach(best, pos)
358                         best_weight += unsafe_costs[pos];
359         } else {
360                 /* Exact Algorithm: Brute force */
361                 curr = bitset_alloca(unsafe_count);
362                 bitset_set_all(curr);
363                 while (bitset_popcount(curr) != 0) {
364                         /* check if curr is a stable set */
365                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
366                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
367                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
368                                                         goto no_stable_set;
369
370                         /* if we arrive here, we have a stable set */
371                         /* compute the weight of the stable set*/
372                         curr_weight = 0;
373                         bitset_foreach(curr, pos)
374                                 curr_weight += unsafe_costs[pos];
375
376                         /* any better ? */
377                         if (curr_weight > best_weight) {
378                                 best_weight = curr_weight;
379                         }
380
381         no_stable_set:
382                         bitset_minus1(curr);
383                 }
384         }
385
386         return safe_costs+best_weight;
387 }
388
389 static void co_collect_units(ir_node *irn, void *env)
390 {
391         const arch_register_req_t *req;
392         copy_opt_t                *co  = (copy_opt_t*)env;
393         unit_t *unit;
394
395         if (get_irn_mode(irn) == mode_T)
396                 return;
397         req = arch_get_irn_register_req(irn);
398         if (req->cls != co->cls)
399                 return;
400         if (!co_is_optimizable_root(irn))
401                 return;
402
403         /* Init a new unit */
404         unit = XMALLOCZ(unit_t);
405         unit->co = co;
406         unit->node_count = 1;
407         INIT_LIST_HEAD(&unit->queue);
408
409         /* Phi with some/all of its arguments */
410         if (is_Reg_Phi(irn)) {
411                 int i, arity;
412
413                 /* init */
414                 arity = get_irn_arity(irn);
415                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
416                 unit->costs = XMALLOCN(int,      arity + 1);
417                 unit->nodes[0] = irn;
418
419                 /* fill */
420                 for (i=0; i<arity; ++i) {
421                         int o, arg_pos;
422                         ir_node *arg = get_irn_n(irn, i);
423
424                         assert(arch_get_irn_reg_class(arg) == co->cls && "Argument not in same register class.");
425                         if (arg == irn)
426                                 continue;
427                         if (nodes_interfere(co->cenv, irn, arg)) {
428                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
429                                 continue;
430                         }
431
432                         /* Else insert the argument of the phi to the members of this ou */
433                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
434
435                         if (arch_irn_is_ignore(arg))
436                                 continue;
437
438                         /* Check if arg has occurred at a prior position in the arg/list */
439                         arg_pos = 0;
440                         for (o=1; o<unit->node_count; ++o) {
441                                 if (unit->nodes[o] == arg) {
442                                         arg_pos = o;
443                                         break;
444                                 }
445                         }
446
447                         if (!arg_pos) { /* a new argument */
448                                 /* insert node, set costs */
449                                 unit->nodes[unit->node_count] = arg;
450                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
451                                 unit->node_count++;
452                         } else { /* arg has occurred before in same phi */
453                                 /* increase costs for existing arg */
454                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
455                         }
456                 }
457                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
458                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
459         } else if (is_Perm_Proj(irn)) {
460                 /* Proj of a perm with corresponding arg */
461                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
462                 unit->nodes = XMALLOCN(ir_node*, 2);
463                 unit->costs = XMALLOCN(int,      2);
464                 unit->node_count = 2;
465                 unit->nodes[0] = irn;
466                 unit->nodes[1] = get_Perm_src(irn);
467                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
468         } else {
469                 /* Src == Tgt of a 2-addr-code instruction */
470                 if (is_2addr_code(req)) {
471                         const unsigned other = req->other_same;
472                         int            count = 0;
473                         int            i;
474
475                         for (i = 0; (1U << i) <= other; ++i) {
476                                 if (other & (1U << i)) {
477                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
478                                         if (arch_irn_is_ignore(o))
479                                                 continue;
480                                         if (nodes_interfere(co->cenv, irn, o))
481                                                 continue;
482                                         ++count;
483                                 }
484                         }
485
486                         if (count != 0) {
487                                 int k = 0;
488                                 ++count;
489                                 unit->nodes = XMALLOCN(ir_node*, count);
490                                 unit->costs = XMALLOCN(int,      count);
491                                 unit->node_count = count;
492                                 unit->nodes[k++] = irn;
493
494                                 for (i = 0; 1U << i <= other; ++i) {
495                                         if (other & (1U << i)) {
496                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
497                                                 if (!arch_irn_is_ignore(o) &&
498                                                                 !nodes_interfere(co->cenv, irn, o)) {
499                                                         unit->nodes[k] = o;
500                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
501                                                         ++k;
502                                                 }
503                                         }
504                                 }
505                         }
506                 } else {
507                         assert(0 && "This is not an optimizable node!");
508                 }
509         }
510
511         /* Insert the new unit at a position according to its costs */
512         if (unit->node_count > 1) {
513                 int i;
514                 struct list_head *tmp;
515
516                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
517                 for (i=1; i<unit->node_count; ++i) {
518                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
519                         unit->all_nodes_costs += unit->costs[i];
520                 }
521
522                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
523                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
524                 /* Insert the new ou according to its sort_key */
525                 tmp = &co->units;
526                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
527                         tmp = tmp->next;
528                 list_add(&unit->units, tmp);
529         } else {
530                 free(unit);
531         }
532 }
533
534 #ifdef QUICK_AND_DIRTY_HACK
535
536 static int compare_ous(const void *k1, const void *k2)
537 {
538         const unit_t *u1 = *((const unit_t **) k1);
539         const unit_t *u2 = *((const unit_t **) k2);
540         int i, o, u1_has_constr, u2_has_constr;
541         arch_register_req_t req;
542
543         /* Units with constraints come first */
544         u1_has_constr = 0;
545         for (i=0; i<u1->node_count; ++i) {
546                 arch_get_irn_register_req(&req, u1->nodes[i]);
547                 if (arch_register_req_is(&req, limited)) {
548                         u1_has_constr = 1;
549                         break;
550                 }
551         }
552
553         u2_has_constr = 0;
554         for (i=0; i<u2->node_count; ++i) {
555                 arch_get_irn_register_req(&req, u2->nodes[i]);
556                 if (arch_register_req_is(&req, limited)) {
557                         u2_has_constr = 1;
558                         break;
559                 }
560         }
561
562         if (u1_has_constr != u2_has_constr)
563                 return u2_has_constr - u1_has_constr;
564
565         /* Now check, whether the two units are connected */
566 #if 0
567         for (i=0; i<u1->node_count; ++i)
568                 for (o=0; o<u2->node_count; ++o)
569                         if (u1->nodes[i] == u2->nodes[o])
570                                 return 0;
571 #endif
572
573         /* After all, the sort key decides. Greater keys come first. */
574         return u2->sort_key - u1->sort_key;
575
576 }
577
578 /**
579  * Sort the ou's according to constraints and their sort_key
580  */
581 static void co_sort_units(copy_opt_t *co)
582 {
583         int i, count = 0, costs;
584         unit_t *ou, **ous;
585
586         /* get the number of ous, remove them form the list and fill the array */
587         list_for_each_entry(unit_t, ou, &co->units, units)
588                 count++;
589         ous = ALLOCAN(unit_t, count);
590
591         costs = co_get_max_copy_costs(co);
592
593         i = 0;
594         list_for_each_entry(unit_t, ou, &co->units, units)
595                 ous[i++] = ou;
596
597         INIT_LIST_HEAD(&co->units);
598
599         assert(count == i && list_empty(&co->units));
600
601         for (i=0; i<count; ++i)
602                 ir_printf("%+F\n", ous[i]->nodes[0]);
603
604         qsort(ous, count, sizeof(*ous), compare_ous);
605
606         ir_printf("\n\n");
607         for (i=0; i<count; ++i)
608                 ir_printf("%+F\n", ous[i]->nodes[0]);
609
610         /* reinsert into list in correct order */
611         for (i=0; i<count; ++i)
612                 list_add_tail(&ous[i]->units, &co->units);
613
614         assert(costs == co_get_max_copy_costs(co));
615 }
616 #endif
617
618 void co_build_ou_structure(copy_opt_t *co)
619 {
620         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
621         INIT_LIST_HEAD(&co->units);
622         irg_walk_graph(co->irg, co_collect_units, NULL, co);
623 #ifdef QUICK_AND_DIRTY_HACK
624         co_sort_units(co);
625 #endif
626 }
627
628 void co_free_ou_structure(copy_opt_t *co)
629 {
630         unit_t *curr, *tmp;
631         ASSERT_OU_AVAIL(co);
632         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
633                 xfree(curr->nodes);
634                 xfree(curr->costs);
635                 xfree(curr);
636         }
637         co->units.next = NULL;
638 }
639
640 /* co_solve_heuristic() is implemented in becopyheur.c */
641
642 int co_get_max_copy_costs(const copy_opt_t *co)
643 {
644         int i, res = 0;
645         unit_t *curr;
646
647         ASSERT_OU_AVAIL(co);
648
649         list_for_each_entry(unit_t, curr, &co->units, units) {
650                 res += curr->inevitable_costs;
651                 for (i=1; i<curr->node_count; ++i)
652                         res += curr->costs[i];
653         }
654         return res;
655 }
656
657 int co_get_inevit_copy_costs(const copy_opt_t *co)
658 {
659         int res = 0;
660         unit_t *curr;
661
662         ASSERT_OU_AVAIL(co);
663
664         list_for_each_entry(unit_t, curr, &co->units, units)
665                 res += curr->inevitable_costs;
666         return res;
667 }
668
669 int co_get_copy_costs(const copy_opt_t *co)
670 {
671         int i, res = 0;
672         unit_t *curr;
673
674         ASSERT_OU_AVAIL(co);
675
676         list_for_each_entry(unit_t, curr, &co->units, units) {
677                 int root_col = get_irn_col(curr->nodes[0]);
678                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
679                 res += curr->inevitable_costs;
680                 for (i=1; i<curr->node_count; ++i) {
681                         int arg_col = get_irn_col(curr->nodes[i]);
682                         if (root_col != arg_col) {
683                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
684                                 res += curr->costs[i];
685                         }
686                 }
687         }
688         return res;
689 }
690
691 int co_get_lower_bound(const copy_opt_t *co)
692 {
693         int res = 0;
694         unit_t *curr;
695
696         ASSERT_OU_AVAIL(co);
697
698         list_for_each_entry(unit_t, curr, &co->units, units)
699                 res += curr->inevitable_costs + curr->min_nodes_costs;
700         return res;
701 }
702
703 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
704 {
705         bitset_t *seen = bitset_malloc(get_irg_last_idx(co->irg));
706         affinity_node_t *an;
707
708         memset(stat, 0, sizeof(stat[0]));
709
710         /* count affinity edges. */
711         co_gs_foreach_aff_node(co, an) {
712                 neighb_t *neigh;
713                 stat->aff_nodes += 1;
714                 bitset_set(seen, get_irn_idx(an->irn));
715                 co_gs_foreach_neighb(an, neigh) {
716                         if (!bitset_is_set(seen, get_irn_idx(neigh->irn))) {
717                                 stat->aff_edges += 1;
718                                 stat->max_costs += neigh->costs;
719
720                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
721                                         stat->costs += neigh->costs;
722                                         stat->unsatisfied_edges += 1;
723                                 }
724
725                                 if (nodes_interfere(co->cenv, an->irn, neigh->irn)) {
726                                         stat->aff_int += 1;
727                                         stat->inevit_costs += neigh->costs;
728                                 }
729
730                         }
731                 }
732         }
733
734         bitset_free(seen);
735 }
736
737 /******************************************************************************
738    _____                 _        _____ _
739   / ____|               | |      / ____| |
740  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
741  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
742  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
743   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
744                   | |                                       __/ |
745                   |_|                                      |___/
746  ******************************************************************************/
747
748 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size)
749 {
750         const affinity_node_t *n1 = (const affinity_node_t*)k1;
751         const affinity_node_t *n2 = (const affinity_node_t*)k2;
752         (void) size;
753
754         return (n1->irn != n2->irn);
755 }
756
757 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
758 {
759         affinity_node_t new_node, *node;
760         neighb_t        *nbr;
761         int             allocnew = 1;
762
763         new_node.irn        = n1;
764         new_node.degree     = 0;
765         new_node.neighbours = NULL;
766         node = (affinity_node_t*)set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
767
768         for (nbr = node->neighbours; nbr; nbr = nbr->next)
769                 if (nbr->irn == n2) {
770                         allocnew = 0;
771                         break;
772                 }
773
774         /* if we did not find n2 in n1's neighbourhood insert it */
775         if (allocnew) {
776                 nbr        = OALLOC(&co->obst, neighb_t);
777                 nbr->irn   = n2;
778                 nbr->costs = 0;
779                 nbr->next  = node->neighbours;
780
781                 node->neighbours = nbr;
782                 node->degree++;
783         }
784
785         /* now nbr points to n1's neighbour-entry of n2 */
786         nbr->costs += costs;
787 }
788
789 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
790 {
791         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
792                 add_edge(co, n1, n2, costs);
793                 add_edge(co, n2, n1, costs);
794         }
795 }
796
797 static void build_graph_walker(ir_node *irn, void *env)
798 {
799         const arch_register_req_t *req;
800         copy_opt_t                *co  = (copy_opt_t*)env;
801         int pos, max;
802
803         if (get_irn_mode(irn) == mode_T)
804                 return;
805         req = arch_get_irn_register_req(irn);
806         if (req->cls != co->cls || arch_irn_is_ignore(irn))
807                 return;
808
809         if (is_Reg_Phi(irn)) { /* Phis */
810                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
811                         ir_node *arg = get_irn_n(irn, pos);
812                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
813                 }
814         } else if (is_Perm_Proj(irn)) { /* Perms */
815                 ir_node *arg = get_Perm_src(irn);
816                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
817         } else { /* 2-address code */
818                 if (is_2addr_code(req)) {
819                         const unsigned other = req->other_same;
820                         int i;
821
822                         for (i = 0; 1U << i <= other; ++i) {
823                                 if (other & (1U << i)) {
824                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
825                                         if (!arch_irn_is_ignore(other))
826                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
827                                 }
828                         }
829                 }
830         }
831 }
832
833 void co_build_graph_structure(copy_opt_t *co)
834 {
835         obstack_init(&co->obst);
836         co->nodes = new_set(compare_affinity_node_t, 32);
837
838         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
839 }
840
841 void co_free_graph_structure(copy_opt_t *co)
842 {
843         ASSERT_GS_AVAIL(co);
844
845         del_set(co->nodes);
846         obstack_free(&co->obst, NULL);
847         co->nodes = NULL;
848 }
849
850 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn)
851 {
852         affinity_node_t new_node, *n;
853
854         ASSERT_GS_AVAIL(co);
855
856         new_node.irn = irn;
857         n = (affinity_node_t*)set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
858         if (n) {
859                 return (n->degree > 0);
860         } else
861                 return 0;
862 }
863
864 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
865 {
866         ir_node *nodes[]  = { a, b };
867         bitset_t *constr[] = { NULL, NULL };
868         int j;
869
870         constr[0] = bitset_alloca(co->cls->n_regs);
871         constr[1] = bitset_alloca(co->cls->n_regs);
872
873         for (j = 0; j < 2; ++j) {
874                 const arch_register_req_t *req = arch_get_irn_register_req(nodes[j]);
875                 if (arch_register_req_is(req, limited))
876                         rbitset_copy_to_bitset(req->limited, constr[j]);
877                 else
878                         bitset_set_all(constr[j]);
879
880         }
881
882         return !bitset_intersect(constr[0], constr[1]);
883 }
884
885 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
886 {
887         be_ifg_t *ifg       = co->cenv->ifg;
888         int      *color_map = ALLOCAN(int, co->cls->n_regs);
889         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
890         ir_graph *irg       = co->irg;
891         be_irg_t *birg      = be_birg_from_irg(irg);
892
893         ir_node *irn;
894         nodes_iter_t it;
895         neighbours_iter_t nit;
896         int n, n_regs;
897         unsigned i;
898
899         n_regs = 0;
900         for (i = 0; i < co->cls->n_regs; ++i) {
901                 const arch_register_t *reg = &co->cls->regs[i];
902                 if (rbitset_is_set(birg->allocatable_regs, reg->global_index)) {
903                         color_map[i] = n_regs++;
904                 } else {
905                         color_map[i] = -1;
906                 }
907         }
908
909         /*
910          * n contains the first node number.
911          * the values below n are the pre-colored register nodes
912          */
913
914         n = n_regs;
915         be_ifg_foreach_node(ifg, &it, irn) {
916                 if (arch_irn_is_ignore(irn))
917                         continue;
918                 node_map[get_irn_idx(irn)] = n++;
919         }
920
921         fprintf(f, "%d %d\n", n, n_regs);
922
923         be_ifg_foreach_node(ifg, &it, irn) {
924                 if (!arch_irn_is_ignore(irn)) {
925                         int idx                        = node_map[get_irn_idx(irn)];
926                         affinity_node_t           *a   = get_affinity_info(co, irn);
927                         const arch_register_req_t *req = arch_get_irn_register_req(irn);
928                         ir_node                   *adj;
929
930                         if (arch_register_req_is(req, limited)) {
931                                 for (i = 0; i < co->cls->n_regs; ++i) {
932                                         if (!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
933                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
934                                 }
935                         }
936
937                         be_ifg_foreach_neighbour(ifg, &nit, irn, adj) {
938                                 if (!arch_irn_is_ignore(adj) &&
939                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
940                                         int adj_idx = node_map[get_irn_idx(adj)];
941                                         if (idx < adj_idx)
942                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
943                                 }
944                         }
945
946                         if (a) {
947                                 neighb_t *n;
948
949                                 co_gs_foreach_neighb(a, n) {
950                                         if (!arch_irn_is_ignore(n->irn)) {
951                                                 int n_idx = node_map[get_irn_idx(n->irn)];
952                                                 if (idx < n_idx)
953                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
954                                         }
955                                 }
956                         }
957                 }
958         }
959
960         xfree(node_map);
961 }
962
963 /*
964          ___ _____ ____   ____   ___ _____   ____                        _
965         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
966          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
967          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
968         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
969                                                                   |_|            |___/
970 */
971
972 static const char *get_dot_color_name(size_t col)
973 {
974         static const char *names[] = {
975                 "blue",
976                 "red",
977                 "green",
978                 "yellow",
979                 "cyan",
980                 "magenta",
981                 "orange",
982                 "chocolate",
983                 "beige",
984                 "navy",
985                 "darkgreen",
986                 "darkred",
987                 "lightPink",
988                 "chartreuse",
989                 "lightskyblue",
990                 "linen",
991                 "pink",
992                 "lightslateblue",
993                 "mintcream",
994                 "red",
995                 "darkolivegreen",
996                 "mediumblue",
997                 "mistyrose",
998                 "salmon",
999                 "darkseagreen",
1000                 "mediumslateblue"
1001                 "moccasin",
1002                 "tomato",
1003                 "forestgreen",
1004                 "darkturquoise",
1005                 "palevioletred"
1006         };
1007
1008         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1009 }
1010
1011 typedef struct co_ifg_dump_t {
1012         const copy_opt_t *co;
1013         unsigned flags;
1014 } co_ifg_dump_t;
1015
1016 static void ifg_dump_graph_attr(FILE *f, void *self)
1017 {
1018         (void) self;
1019         fprintf(f, "overlap=scale");
1020 }
1021
1022 static int ifg_is_dump_node(void *self, ir_node *irn)
1023 {
1024         (void)self;
1025         return !arch_irn_is_ignore(irn);
1026 }
1027
1028 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1029 {
1030         co_ifg_dump_t             *env     = (co_ifg_dump_t*)self;
1031         const arch_register_t     *reg     = arch_get_irn_register(irn);
1032         const arch_register_req_t *req     = arch_get_irn_register_req(irn);
1033         int                        limited = arch_register_req_is(req, limited);
1034
1035         if (env->flags & CO_IFG_DUMP_LABELS) {
1036                 ir_fprintf(f, "label=\"%+F", irn);
1037
1038                 if ((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1039                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1040                         rbitset_copy_to_bitset(req->limited, bs);
1041                         ir_fprintf(f, "\\n%B", bs);
1042                 }
1043                 ir_fprintf(f, "\" ");
1044         } else {
1045                 fprintf(f, "label=\"\" shape=point " );
1046         }
1047
1048         if (env->flags & CO_IFG_DUMP_SHAPE)
1049                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1050
1051         if (env->flags & CO_IFG_DUMP_COLORS)
1052                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1053 }
1054
1055 static void ifg_dump_at_end(FILE *file, void *self)
1056 {
1057         co_ifg_dump_t *env = (co_ifg_dump_t*)self;
1058         affinity_node_t *a;
1059
1060         co_gs_foreach_aff_node(env->co, a) {
1061                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1062                 unsigned aidx = get_irn_idx(a->irn);
1063                 neighb_t *n;
1064
1065                 co_gs_foreach_neighb(a, n) {
1066                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1067                         unsigned nidx = get_irn_idx(n->irn);
1068
1069                         if (aidx < nidx) {
1070                                 const char *color = nr == ar ? "blue" : "red";
1071                                 fprintf(file, "\tn%u -- n%u [weight=0.01 ", aidx, nidx);
1072                                 if (env->flags & CO_IFG_DUMP_LABELS)
1073                                         fprintf(file, "label=\"%d\" ", n->costs);
1074                                 if (env->flags & CO_IFG_DUMP_COLORS)
1075                                         fprintf(file, "color=%s ", color);
1076                                 else
1077                                         fprintf(file, "style=dotted");
1078                                 fprintf(file, "];\n");
1079                         }
1080                 }
1081         }
1082 }
1083
1084
1085 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1086         ifg_is_dump_node,
1087         ifg_dump_graph_attr,
1088         ifg_dump_node_attr,
1089         NULL,
1090         NULL,
1091         ifg_dump_at_end
1092 };
1093
1094
1095
1096 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1097 {
1098         co_ifg_dump_t cod;
1099
1100         cod.co    = co;
1101         cod.flags = flags;
1102         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1103 }
1104
1105
1106 void co_solve_park_moon(copy_opt_t *opt)
1107 {
1108         (void) opt;
1109 }
1110
1111 /*
1112     __  __       _         ____       _
1113    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1114    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1115    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1116    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1117
1118 */
1119
1120 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1121 {
1122         FILE *result;
1123         char buf[1024];
1124         size_t i, n;
1125         char *tu_name;
1126         const char *cup_name = be_get_irg_main_env(env->irg)->cup_name;
1127
1128         n = strlen(cup_name);
1129         tu_name = XMALLOCN(char, n + 1);
1130         strcpy(tu_name, cup_name);
1131         for (i = 0; i < n; ++i)
1132                 if (tu_name[i] == '.')
1133                         tu_name[i] = '_';
1134
1135
1136         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1137         xfree(tu_name);
1138         result = fopen(buf, "wt");
1139         if (result == NULL) {
1140                 panic("Couldn't open '%s' for writing.", buf);
1141         }
1142
1143         return result;
1144 }
1145
1146 void co_driver(be_chordal_env_t *cenv)
1147 {
1148         ir_timer_t          *timer = ir_timer_new();
1149         co_complete_stats_t before, after;
1150         copy_opt_t          *co;
1151         int                 was_optimal = 0;
1152
1153         assert(selected_copyopt);
1154
1155         /* skip copymin if algo is 'none' */
1156         if (selected_copyopt->copyopt == void_algo)
1157                 return;
1158
1159         be_liveness_assure_chk(be_get_irg_liveness(cenv->irg));
1160
1161         co = new_copy_opt(cenv, cost_func);
1162         co_build_ou_structure(co);
1163         co_build_graph_structure(co);
1164
1165         co_complete_stats(co, &before);
1166
1167         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1168         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1169         be_stat_ev_ull("co_max_costs",    before.max_costs);
1170         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1171         be_stat_ev_ull("co_aff_int",      before.aff_int);
1172
1173         be_stat_ev_ull("co_init_costs",   before.costs);
1174         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1175
1176         if (dump_flags & DUMP_BEFORE) {
1177                 FILE *f = my_open(cenv, "", "-before.dot");
1178                 co_dump_ifg_dot(co, f, style_flags);
1179                 fclose(f);
1180         }
1181
1182         /* if the algo can improve results, provide an initial solution with heur1 */
1183         if (improve && selected_copyopt->can_improve_existing) {
1184                 co_complete_stats_t stats;
1185
1186                 /* produce a heuristic solution */
1187                 co_solve_heuristic(co);
1188
1189                 /* do the stats and provide the current costs */
1190                 co_complete_stats(co, &stats);
1191                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1192         }
1193
1194         /* perform actual copy minimization */
1195         ir_timer_reset_and_start(timer);
1196         was_optimal = selected_copyopt->copyopt(co);
1197         ir_timer_stop(timer);
1198
1199         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1200         be_stat_ev_ull("co_optimal", was_optimal);
1201         ir_timer_free(timer);
1202
1203         if (dump_flags & DUMP_AFTER) {
1204                 FILE *f = my_open(cenv, "", "-after.dot");
1205                 co_dump_ifg_dot(co, f, style_flags);
1206                 fclose(f);
1207         }
1208
1209         co_complete_stats(co, &after);
1210
1211         if (do_stats) {
1212                 unsigned long long optimizable_costs = after.max_costs - after.inevit_costs;
1213                 unsigned long long evitable          = after.costs     - after.inevit_costs;
1214
1215                 ir_printf("%30F ", cenv->irg);
1216                 printf("%10s %10llu%10llu%10llu", cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1217
1218                 if (optimizable_costs > 0)
1219                         printf("%10llu %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1220                 else
1221                         printf("%10llu %5s\n", after.costs, "-");
1222         }
1223
1224         /* Dump the interference graph in Appel's format. */
1225         if (dump_flags & DUMP_APPEL) {
1226                 FILE *f = my_open(cenv, "", ".apl");
1227                 fprintf(f, "# %llu %llu\n", after.costs, after.unsatisfied_edges);
1228                 co_dump_appel_graph(co, f);
1229                 fclose(f);
1230         }
1231
1232         be_stat_ev_ull("co_after_costs", after.costs);
1233         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1234
1235         co_free_graph_structure(co);
1236         co_free_ou_structure(co);
1237         free_copy_opt(co);
1238 }