Remove address name SymConsts.
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "irbitset.h"
46 #include "irphase_t.h"
47 #include "irprintf_t.h"
48
49 #include "bemodule.h"
50 #include "bearch.h"
51 #include "benode.h"
52 #include "beutil.h"
53 #include "beifg_t.h"
54 #include "beintlive_t.h"
55 #include "becopyopt_t.h"
56 #include "becopystat.h"
57 #include "belive_t.h"
58 #include "beinsn_t.h"
59 #include "besched.h"
60 #include "bestatevent.h"
61 #include "beirg.h"
62 #include "error.h"
63
64 #include "lc_opts.h"
65 #include "lc_opts_enum.h"
66
67 #define DUMP_BEFORE 1
68 #define DUMP_AFTER  2
69 #define DUMP_APPEL  4
70 #define DUMP_ALL    2 * DUMP_APPEL - 1
71
72 #define COST_FUNC_FREQ     1
73 #define COST_FUNC_LOOP     2
74 #define COST_FUNC_ALL_ONE  3
75
76 static unsigned   dump_flags  = 0;
77 static unsigned   style_flags = 0;
78 static unsigned   do_stats    = 0;
79 static cost_fct_t cost_func   = co_get_costs_exec_freq;
80 static int        improve     = 1;
81
82 static const lc_opt_enum_mask_items_t dump_items[] = {
83         { "before",  DUMP_BEFORE },
84         { "after",   DUMP_AFTER  },
85         { "appel",   DUMP_APPEL  },
86         { "all",     DUMP_ALL    },
87         { NULL,      0 }
88 };
89
90 static const lc_opt_enum_mask_items_t style_items[] = {
91         { "color",   CO_IFG_DUMP_COLORS },
92         { "labels",  CO_IFG_DUMP_LABELS },
93         { "constr",  CO_IFG_DUMP_CONSTR },
94         { "shape",   CO_IFG_DUMP_SHAPE  },
95         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
96         { NULL,      0 }
97 };
98
99 typedef int (*opt_funcptr)(void);
100
101 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
102         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
103         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
104         { "one",    (opt_funcptr) co_get_costs_all_one },
105         { NULL,     NULL }
106 };
107
108 static lc_opt_enum_mask_var_t dump_var = {
109         &dump_flags, dump_items
110 };
111
112 static lc_opt_enum_mask_var_t style_var = {
113         &style_flags, style_items
114 };
115
116 static lc_opt_enum_func_ptr_var_t cost_func_var = {
117         (opt_funcptr*) &cost_func, cost_func_items
118 };
119
120 static const lc_opt_table_entry_t options[] = {
121         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
122         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
123         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
124         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
125         LC_OPT_ENT_BOOL          ("improve", "run heur1 before if algo can exploit start solutions",    &improve),
126         LC_OPT_LAST
127 };
128
129 static be_module_list_entry_t *copyopts = NULL;
130 static const co_algo_info *selected_copyopt = NULL;
131
132 void be_register_copyopt(const char *name, co_algo_info *copyopt)
133 {
134         if (selected_copyopt == NULL)
135                 selected_copyopt = copyopt;
136         be_add_module_to_list(&copyopts, name, copyopt);
137 }
138
139 void be_init_copyopt(void)
140 {
141         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
142         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
143         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
144         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
145
146         lc_opt_add_table(co_grp, options);
147         be_add_module_list_opt(co_grp, "algo", "select copy optimization algo",
148                                        &copyopts, (void**) &selected_copyopt);
149 }
150
151 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copyopt);
152
153 static int void_algo(copy_opt_t *co)
154 {
155         (void) co;
156         return 0;
157 }
158
159 void be_init_copynone(void)
160 {
161         static co_algo_info copyheur = {
162                 void_algo, 0
163         };
164
165         be_register_copyopt("none", &copyheur);
166 }
167
168 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copynone);
169
170 #undef QUICK_AND_DIRTY_HACK
171
172 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
173 {
174         if (env->ifg)
175                 return be_ifg_connected(env->ifg, a, b);
176         else
177                 return be_values_interfere(env->birg->lv, a, b);
178 }
179
180
181 /******************************************************************************
182     _____                           _
183    / ____|                         | |
184   | |  __  ___ _ __   ___ _ __ __ _| |
185   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
186   | |__| |  __/ | | |  __/ | | (_| | |
187    \_____|\___|_| |_|\___|_|  \__,_|_|
188
189  ******************************************************************************/
190
191 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
192
193
194 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
195 {
196         const char *s1, *s2, *s3;
197         int len;
198         copy_opt_t *co;
199
200         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
201
202         co = XMALLOCZ(copy_opt_t);
203         co->cenv      = chordal_env;
204         co->irg       = chordal_env->irg;
205         co->cls       = chordal_env->cls;
206         co->get_costs = get_costs;
207
208         s1 = get_irp_name();
209         s2 = get_entity_name(get_irg_entity(co->irg));
210         s3 = chordal_env->cls->name;
211         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
212         co->name = XMALLOCN(char, len);
213         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
214
215         return co;
216 }
217
218 void free_copy_opt(copy_opt_t *co)
219 {
220         xfree(co->name);
221         free(co);
222 }
223
224 /**
225  * Checks if a node is optimizable, viz. has something to do with coalescing
226  * @param irn  The irn to check
227  */
228 static int co_is_optimizable_root(ir_node *irn)
229 {
230         const arch_register_req_t *req;
231         const arch_register_t     *reg;
232
233         if (arch_irn_is_ignore(irn))
234                 return 0;
235
236         reg = arch_get_irn_register(irn);
237         if (arch_register_type_is(reg, ignore))
238                 return 0;
239
240         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
241                 return 1;
242
243         req = arch_get_register_req_out(irn);
244         if (is_2addr_code(req))
245                 return 1;
246
247         return 0;
248 }
249
250 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
251 {
252         int cost = 0;
253         ir_loop *loop;
254         ir_node *root_block = get_nodes_block(root);
255         (void) co;
256         (void) arg;
257
258         if (is_Phi(root)) {
259                 /* for phis the copies are placed in the corresponding pred-block */
260                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
261         } else {
262                 /* a perm places the copy in the same block as it resides */
263                 loop = get_irn_loop(root_block);
264         }
265         if (loop) {
266                 int d = get_loop_depth(loop);
267                 cost = d*d;
268         }
269         return 1+cost;
270 }
271
272 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
273 {
274         int res;
275         ir_node *root_bl = get_nodes_block(root);
276         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
277         (void) arg;
278         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
279
280         /* don't allow values smaller than one. */
281         return res < 1 ? 1 : res;
282 }
283
284
285 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos)
286 {
287         (void) co;
288         (void) root;
289         (void) arg;
290         (void) pos;
291         return 1;
292 }
293
294 /******************************************************************************
295    ____        _   _    _       _ _          _____ _
296   / __ \      | | | |  | |     (_) |        / ____| |
297  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
298  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
299  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
300   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
301         | |                                                            __/ |
302         |_|                                                           |___/
303  ******************************************************************************/
304
305 /**
306  * Determines a maximum weighted independent set with respect to
307  * the interference and conflict edges of all nodes in a qnode.
308  */
309 static int ou_max_ind_set_costs(unit_t *ou)
310 {
311         be_chordal_env_t *chordal_env = ou->co->cenv;
312         ir_node **safe, **unsafe;
313         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
314         bitset_t *curr;
315         bitset_pos_t pos;
316         int max, curr_weight, best_weight = 0;
317
318         /* assign the nodes into two groups.
319          * safe: node has no interference, hence it is in every max stable set.
320          * unsafe: node has an interference
321          */
322         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
323         safe_costs   = 0;
324         safe_count   = 0;
325         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
326         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
327         unsafe_count = 0;
328         for (i=1; i<ou->node_count; ++i) {
329                 int is_safe = 1;
330                 for (o=1; o<ou->node_count; ++o) {
331                         if (i==o)
332                                 continue;
333                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
334                                 unsafe_costs[unsafe_count] = ou->costs[i];
335                                 unsafe[unsafe_count] = ou->nodes[i];
336                                 ++unsafe_count;
337                                 is_safe = 0;
338                                 break;
339                         }
340                 }
341                 if (is_safe) {
342                         safe_costs += ou->costs[i];
343                         safe[safe_count++] = ou->nodes[i];
344                 }
345         }
346
347
348         /* now compute the best set out of the unsafe nodes*/
349         if (unsafe_count > MIS_HEUR_TRIGGER) {
350                 bitset_t *best = bitset_alloca(unsafe_count);
351                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
352                 for (i=0; i<unsafe_count; ++i) {
353                         bitset_set(best, i);
354                         /* check if it is a stable set */
355                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
356                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
357                                         bitset_clear(best, i); /* clear the bit and try next one */
358                                         break;
359                                 }
360                 }
361                 /* compute the weight */
362                 bitset_foreach(best, pos)
363                         best_weight += unsafe_costs[pos];
364         } else {
365                 /* Exact Algorithm: Brute force */
366                 curr = bitset_alloca(unsafe_count);
367                 bitset_set_all(curr);
368                 while ((max = bitset_popcnt(curr)) != 0) {
369                         /* check if curr is a stable set */
370                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
371                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
372                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
373                                                         goto no_stable_set;
374
375                         /* if we arrive here, we have a stable set */
376                         /* compute the weigth of the stable set*/
377                         curr_weight = 0;
378                         bitset_foreach(curr, pos)
379                                 curr_weight += unsafe_costs[pos];
380
381                         /* any better ? */
382                         if (curr_weight > best_weight) {
383                                 best_weight = curr_weight;
384                         }
385
386         no_stable_set:
387                         bitset_minus1(curr);
388                 }
389         }
390
391         return safe_costs+best_weight;
392 }
393
394 static void co_collect_units(ir_node *irn, void *env)
395 {
396         const arch_register_req_t *req = arch_get_register_req_out(irn);
397         copy_opt_t                *co  = env;
398         unit_t *unit;
399
400         if (req->cls != co->cls)
401                 return;
402         if (!co_is_optimizable_root(irn))
403                 return;
404
405         /* Init a new unit */
406         unit = XMALLOCZ(unit_t);
407         unit->co = co;
408         unit->node_count = 1;
409         INIT_LIST_HEAD(&unit->queue);
410
411         /* Phi with some/all of its arguments */
412         if (is_Reg_Phi(irn)) {
413                 int i, arity;
414
415                 /* init */
416                 arity = get_irn_arity(irn);
417                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
418                 unit->costs = XMALLOCN(int,      arity + 1);
419                 unit->nodes[0] = irn;
420
421                 /* fill */
422                 for (i=0; i<arity; ++i) {
423                         int o, arg_pos;
424                         ir_node *arg = get_irn_n(irn, i);
425
426                         assert(arch_get_irn_reg_class_out(arg) == co->cls && "Argument not in same register class.");
427                         if (arg == irn)
428                                 continue;
429                         if (nodes_interfere(co->cenv, irn, arg)) {
430                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
431                                 continue;
432                         }
433
434                         /* Else insert the argument of the phi to the members of this ou */
435                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
436
437                         if (arch_irn_is_ignore(arg))
438                                 continue;
439
440                         /* Check if arg has occurred at a prior position in the arg/list */
441                         arg_pos = 0;
442                         for (o=1; o<unit->node_count; ++o) {
443                                 if (unit->nodes[o] == arg) {
444                                         arg_pos = o;
445                                         break;
446                                 }
447                         }
448
449                         if (!arg_pos) { /* a new argument */
450                                 /* insert node, set costs */
451                                 unit->nodes[unit->node_count] = arg;
452                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
453                                 unit->node_count++;
454                         } else { /* arg has occurred before in same phi */
455                                 /* increase costs for existing arg */
456                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
457                         }
458                 }
459                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
460                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
461         } else if (is_Perm_Proj(irn)) {
462                 /* Proj of a perm with corresponding arg */
463                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
464                 unit->nodes = XMALLOCN(ir_node*, 2);
465                 unit->costs = XMALLOCN(int,      2);
466                 unit->node_count = 2;
467                 unit->nodes[0] = irn;
468                 unit->nodes[1] = get_Perm_src(irn);
469                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
470         } else {
471                 /* Src == Tgt of a 2-addr-code instruction */
472                 if (is_2addr_code(req)) {
473                         const unsigned other = req->other_same;
474                         int            count = 0;
475                         int            i;
476
477                         for (i = 0; (1U << i) <= other; ++i) {
478                                 if (other & (1U << i)) {
479                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
480                                         if (arch_irn_is_ignore(o))
481                                                 continue;
482                                         if (nodes_interfere(co->cenv, irn, o))
483                                                 continue;
484                                         ++count;
485                                 }
486                         }
487
488                         if (count != 0) {
489                                 int k = 0;
490                                 ++count;
491                                 unit->nodes = XMALLOCN(ir_node*, count);
492                                 unit->costs = XMALLOCN(int,      count);
493                                 unit->node_count = count;
494                                 unit->nodes[k++] = irn;
495
496                                 for (i = 0; 1U << i <= other; ++i) {
497                                         if (other & (1U << i)) {
498                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
499                                                 if (!arch_irn_is_ignore(o) &&
500                                                                 !nodes_interfere(co->cenv, irn, o)) {
501                                                         unit->nodes[k] = o;
502                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
503                                                         ++k;
504                                                 }
505                                         }
506                                 }
507                         }
508                 } else {
509                         assert(0 && "This is not an optimizable node!");
510                 }
511         }
512
513         /* Insert the new unit at a position according to its costs */
514         if (unit->node_count > 1) {
515                 int i;
516                 struct list_head *tmp;
517
518                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
519                 for (i=1; i<unit->node_count; ++i) {
520                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
521                         unit->all_nodes_costs += unit->costs[i];
522                 }
523
524                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
525                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
526                 /* Insert the new ou according to its sort_key */
527                 tmp = &co->units;
528                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
529                         tmp = tmp->next;
530                 list_add(&unit->units, tmp);
531         } else {
532                 free(unit);
533         }
534 }
535
536 #ifdef QUICK_AND_DIRTY_HACK
537
538 static int compare_ous(const void *k1, const void *k2)
539 {
540         const unit_t *u1 = *((const unit_t **) k1);
541         const unit_t *u2 = *((const unit_t **) k2);
542         int i, o, u1_has_constr, u2_has_constr;
543         arch_register_req_t req;
544
545         /* Units with constraints come first */
546         u1_has_constr = 0;
547         for (i=0; i<u1->node_count; ++i) {
548                 arch_get_register_req_out(&req, u1->nodes[i]);
549                 if (arch_register_req_is(&req, limited)) {
550                         u1_has_constr = 1;
551                         break;
552                 }
553         }
554
555         u2_has_constr = 0;
556         for (i=0; i<u2->node_count; ++i) {
557                 arch_get_register_req_out(&req, u2->nodes[i]);
558                 if (arch_register_req_is(&req, limited)) {
559                         u2_has_constr = 1;
560                         break;
561                 }
562         }
563
564         if (u1_has_constr != u2_has_constr)
565                 return u2_has_constr - u1_has_constr;
566
567         /* Now check, whether the two units are connected */
568 #if 0
569         for (i=0; i<u1->node_count; ++i)
570                 for (o=0; o<u2->node_count; ++o)
571                         if (u1->nodes[i] == u2->nodes[o])
572                                 return 0;
573 #endif
574
575         /* After all, the sort key decides. Greater keys come first. */
576         return u2->sort_key - u1->sort_key;
577
578 }
579
580 /**
581  * Sort the ou's according to constraints and their sort_key
582  */
583 static void co_sort_units(copy_opt_t *co)
584 {
585         int i, count = 0, costs;
586         unit_t *ou, **ous;
587
588         /* get the number of ous, remove them form the list and fill the array */
589         list_for_each_entry(unit_t, ou, &co->units, units)
590                 count++;
591         ous = ALLOCAN(unit_t, count);
592
593         costs = co_get_max_copy_costs(co);
594
595         i = 0;
596         list_for_each_entry(unit_t, ou, &co->units, units)
597                 ous[i++] = ou;
598
599         INIT_LIST_HEAD(&co->units);
600
601         assert(count == i && list_empty(&co->units));
602
603         for (i=0; i<count; ++i)
604                 ir_printf("%+F\n", ous[i]->nodes[0]);
605
606         qsort(ous, count, sizeof(*ous), compare_ous);
607
608         ir_printf("\n\n");
609         for (i=0; i<count; ++i)
610                 ir_printf("%+F\n", ous[i]->nodes[0]);
611
612         /* reinsert into list in correct order */
613         for (i=0; i<count; ++i)
614                 list_add_tail(&ous[i]->units, &co->units);
615
616         assert(costs == co_get_max_copy_costs(co));
617 }
618 #endif
619
620 void co_build_ou_structure(copy_opt_t *co)
621 {
622         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
623         INIT_LIST_HEAD(&co->units);
624         irg_walk_graph(co->irg, co_collect_units, NULL, co);
625 #ifdef QUICK_AND_DIRTY_HACK
626         co_sort_units(co);
627 #endif
628 }
629
630 void co_free_ou_structure(copy_opt_t *co)
631 {
632         unit_t *curr, *tmp;
633         ASSERT_OU_AVAIL(co);
634         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
635                 xfree(curr->nodes);
636                 xfree(curr->costs);
637                 xfree(curr);
638         }
639         co->units.next = NULL;
640 }
641
642 /* co_solve_heuristic() is implemented in becopyheur.c */
643
644 int co_get_max_copy_costs(const copy_opt_t *co)
645 {
646         int i, res = 0;
647         unit_t *curr;
648
649         ASSERT_OU_AVAIL(co);
650
651         list_for_each_entry(unit_t, curr, &co->units, units) {
652                 res += curr->inevitable_costs;
653                 for (i=1; i<curr->node_count; ++i)
654                         res += curr->costs[i];
655         }
656         return res;
657 }
658
659 int co_get_inevit_copy_costs(const copy_opt_t *co)
660 {
661         int res = 0;
662         unit_t *curr;
663
664         ASSERT_OU_AVAIL(co);
665
666         list_for_each_entry(unit_t, curr, &co->units, units)
667                 res += curr->inevitable_costs;
668         return res;
669 }
670
671 int co_get_copy_costs(const copy_opt_t *co)
672 {
673         int i, res = 0;
674         unit_t *curr;
675
676         ASSERT_OU_AVAIL(co);
677
678         list_for_each_entry(unit_t, curr, &co->units, units) {
679                 int root_col = get_irn_col(curr->nodes[0]);
680                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
681                 res += curr->inevitable_costs;
682                 for (i=1; i<curr->node_count; ++i) {
683                         int arg_col = get_irn_col(curr->nodes[i]);
684                         if (root_col != arg_col) {
685                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
686                                 res += curr->costs[i];
687                         }
688                 }
689         }
690         return res;
691 }
692
693 int co_get_lower_bound(const copy_opt_t *co)
694 {
695         int res = 0;
696         unit_t *curr;
697
698         ASSERT_OU_AVAIL(co);
699
700         list_for_each_entry(unit_t, curr, &co->units, units)
701                 res += curr->inevitable_costs + curr->min_nodes_costs;
702         return res;
703 }
704
705 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
706 {
707         bitset_t *seen = bitset_irg_malloc(co->irg);
708         affinity_node_t *an;
709
710         memset(stat, 0, sizeof(stat[0]));
711
712         /* count affinity edges. */
713         co_gs_foreach_aff_node(co, an) {
714                 neighb_t *neigh;
715                 stat->aff_nodes += 1;
716                 bitset_add_irn(seen, an->irn);
717                 co_gs_foreach_neighb(an, neigh) {
718                         if (!bitset_contains_irn(seen, neigh->irn)) {
719                                 stat->aff_edges += 1;
720                                 stat->max_costs += neigh->costs;
721
722                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
723                                         stat->costs += neigh->costs;
724                                         stat->unsatisfied_edges += 1;
725                                 }
726
727                                 if (nodes_interfere(co->cenv, an->irn, neigh->irn)) {
728                                         stat->aff_int += 1;
729                                         stat->inevit_costs += neigh->costs;
730                                 }
731
732                         }
733                 }
734         }
735
736         bitset_free(seen);
737 }
738
739 /******************************************************************************
740    _____                 _        _____ _
741   / ____|               | |      / ____| |
742  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
743  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
744  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
745   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
746                   | |                                       __/ |
747                   |_|                                      |___/
748  ******************************************************************************/
749
750 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size)
751 {
752         const affinity_node_t *n1 = k1;
753         const affinity_node_t *n2 = k2;
754         (void) size;
755
756         return (n1->irn != n2->irn);
757 }
758
759 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
760 {
761         affinity_node_t new_node, *node;
762         neighb_t        *nbr;
763         int             allocnew = 1;
764
765         new_node.irn        = n1;
766         new_node.degree     = 0;
767         new_node.neighbours = NULL;
768         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
769
770         for (nbr = node->neighbours; nbr; nbr = nbr->next)
771                 if (nbr->irn == n2) {
772                         allocnew = 0;
773                         break;
774                 }
775
776         /* if we did not find n2 in n1's neighbourhood insert it */
777         if (allocnew) {
778                 nbr        = OALLOC(&co->obst, neighb_t);
779                 nbr->irn   = n2;
780                 nbr->costs = 0;
781                 nbr->next  = node->neighbours;
782
783                 node->neighbours = nbr;
784                 node->degree++;
785         }
786
787         /* now nbr points to n1's neighbour-entry of n2 */
788         nbr->costs += costs;
789 }
790
791 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
792 {
793         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
794                 add_edge(co, n1, n2, costs);
795                 add_edge(co, n2, n1, costs);
796         }
797 }
798
799 static void build_graph_walker(ir_node *irn, void *env)
800 {
801         const arch_register_req_t *req = arch_get_register_req_out(irn);
802         copy_opt_t                *co  = env;
803         int pos, max;
804         const arch_register_t *reg;
805
806         if (req->cls != co->cls || arch_irn_is_ignore(irn))
807                 return;
808
809         reg = arch_get_irn_register(irn);
810         if (arch_register_type_is(reg, ignore))
811                 return;
812
813         if (is_Reg_Phi(irn)) { /* Phis */
814                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
815                         ir_node *arg = get_irn_n(irn, pos);
816                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
817                 }
818         } else if (is_Perm_Proj(irn)) { /* Perms */
819                 ir_node *arg = get_Perm_src(irn);
820                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
821         } else { /* 2-address code */
822                 if (is_2addr_code(req)) {
823                         const unsigned other = req->other_same;
824                         int i;
825
826                         for (i = 0; 1U << i <= other; ++i) {
827                                 if (other & (1U << i)) {
828                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
829                                         if (!arch_irn_is_ignore(other))
830                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
831                                 }
832                         }
833                 }
834         }
835 }
836
837 void co_build_graph_structure(copy_opt_t *co)
838 {
839         obstack_init(&co->obst);
840         co->nodes = new_set(compare_affinity_node_t, 32);
841
842         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
843 }
844
845 void co_free_graph_structure(copy_opt_t *co)
846 {
847         ASSERT_GS_AVAIL(co);
848
849         del_set(co->nodes);
850         obstack_free(&co->obst, NULL);
851         co->nodes = NULL;
852 }
853
854 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
855
856 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn)
857 {
858         affinity_node_t new_node, *n;
859
860         ASSERT_GS_AVAIL(co);
861
862         new_node.irn = irn;
863         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
864         if (n) {
865                 return (n->degree > 0);
866         } else
867                 return 0;
868 }
869
870 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
871 {
872         ir_node *nodes[]  = { a, b };
873         bitset_t *constr[] = { NULL, NULL };
874         int j;
875
876         constr[0] = bitset_alloca(co->cls->n_regs);
877         constr[1] = bitset_alloca(co->cls->n_regs);
878
879         for (j = 0; j < 2; ++j) {
880                 const arch_register_req_t *req = arch_get_register_req_out(nodes[j]);
881                 if (arch_register_req_is(req, limited))
882                         rbitset_copy_to_bitset(req->limited, constr[j]);
883                 else
884                         bitset_set_all(constr[j]);
885
886         }
887
888         return !bitset_intersect(constr[0], constr[1]);
889 }
890
891 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
892 {
893         be_ifg_t *ifg       = co->cenv->ifg;
894         int      *color_map = ALLOCAN(int, co->cls->n_regs);
895         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
896
897         ir_node *irn;
898         void *it, *nit;
899         int n, n_regs;
900         unsigned i;
901
902         n_regs = 0;
903         for (i = 0; i < co->cls->n_regs; ++i) {
904                 const arch_register_t *reg = &co->cls->regs[i];
905                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
906         }
907
908         /*
909          * n contains the first node number.
910          * the values below n are the pre-colored register nodes
911          */
912
913         it  = be_ifg_nodes_iter_alloca(ifg);
914         nit = be_ifg_neighbours_iter_alloca(ifg);
915
916         n = n_regs;
917         be_ifg_foreach_node(ifg, it, irn) {
918                 if (arch_irn_is_ignore(irn))
919                         continue;
920                 node_map[get_irn_idx(irn)] = n++;
921         }
922
923         fprintf(f, "%d %d\n", n, n_regs);
924
925         be_ifg_foreach_node(ifg, it, irn) {
926                 if (!arch_irn_is_ignore(irn)) {
927                         int idx                        = node_map[get_irn_idx(irn)];
928                         affinity_node_t           *a   = get_affinity_info(co, irn);
929                         const arch_register_req_t *req = arch_get_register_req_out(irn);
930                         ir_node                   *adj;
931
932                         if (arch_register_req_is(req, limited)) {
933                                 for (i = 0; i < co->cls->n_regs; ++i) {
934                                         if (!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
935                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
936                                 }
937                         }
938
939                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
940                                 if (!arch_irn_is_ignore(adj) &&
941                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
942                                         int adj_idx = node_map[get_irn_idx(adj)];
943                                         if (idx < adj_idx)
944                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
945                                 }
946                         }
947
948                         if (a) {
949                                 neighb_t *n;
950
951                                 co_gs_foreach_neighb(a, n) {
952                                         if (!arch_irn_is_ignore(n->irn)) {
953                                                 int n_idx = node_map[get_irn_idx(n->irn)];
954                                                 if (idx < n_idx)
955                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
956                                         }
957                                 }
958                         }
959                 }
960         }
961
962         xfree(node_map);
963 }
964
965 /*
966          ___ _____ ____   ____   ___ _____   ____                        _
967         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
968          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
969          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
970         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
971                                                                   |_|            |___/
972 */
973
974 static const char *get_dot_color_name(size_t col)
975 {
976         static const char *names[] = {
977                 "blue",
978                 "red",
979                 "green",
980                 "yellow",
981                 "cyan",
982                 "magenta",
983                 "orange",
984                 "chocolate",
985                 "beige",
986                 "navy",
987                 "darkgreen",
988                 "darkred",
989                 "lightPink",
990                 "chartreuse",
991                 "lightskyblue",
992                 "linen",
993                 "pink",
994                 "lightslateblue",
995                 "mintcream",
996                 "red",
997                 "darkolivegreen",
998                 "mediumblue",
999                 "mistyrose",
1000                 "salmon",
1001                 "darkseagreen",
1002                 "mediumslateblue"
1003                 "moccasin",
1004                 "tomato",
1005                 "forestgreen",
1006                 "darkturquoise",
1007                 "palevioletred"
1008         };
1009
1010         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1011 }
1012
1013 typedef struct _co_ifg_dump_t {
1014         const copy_opt_t *co;
1015         unsigned flags;
1016 } co_ifg_dump_t;
1017
1018 static void ifg_dump_graph_attr(FILE *f, void *self)
1019 {
1020         (void) self;
1021         fprintf(f, "overlap=scale");
1022 }
1023
1024 static int ifg_is_dump_node(void *self, ir_node *irn)
1025 {
1026         (void)self;
1027         return !arch_irn_is_ignore(irn);
1028 }
1029
1030 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1031 {
1032         co_ifg_dump_t             *env     = self;
1033         const arch_register_t     *reg     = arch_get_irn_register(irn);
1034         const arch_register_req_t *req     = arch_get_register_req_out(irn);
1035         int                        limited = arch_register_req_is(req, limited);
1036
1037         if (env->flags & CO_IFG_DUMP_LABELS) {
1038                 ir_fprintf(f, "label=\"%+F", irn);
1039
1040                 if ((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1041                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1042                         rbitset_copy_to_bitset(req->limited, bs);
1043                         ir_fprintf(f, "\\n%B", bs);
1044                 }
1045                 ir_fprintf(f, "\" ");
1046         } else {
1047                 fprintf(f, "label=\"\" shape=point " );
1048         }
1049
1050         if (env->flags & CO_IFG_DUMP_SHAPE)
1051                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1052
1053         if (env->flags & CO_IFG_DUMP_COLORS)
1054                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1055 }
1056
1057 static void ifg_dump_at_end(FILE *file, void *self)
1058 {
1059         co_ifg_dump_t *env = self;
1060         affinity_node_t *a;
1061
1062         co_gs_foreach_aff_node(env->co, a) {
1063                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1064                 unsigned aidx = get_irn_idx(a->irn);
1065                 neighb_t *n;
1066
1067                 co_gs_foreach_neighb(a, n) {
1068                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1069                         unsigned nidx = get_irn_idx(n->irn);
1070
1071                         if (aidx < nidx) {
1072                                 const char *color = nr == ar ? "blue" : "red";
1073                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1074                                 if (env->flags & CO_IFG_DUMP_LABELS)
1075                                         fprintf(file, "label=\"%d\" ", n->costs);
1076                                 if (env->flags & CO_IFG_DUMP_COLORS)
1077                                         fprintf(file, "color=%s ", color);
1078                                 else
1079                                         fprintf(file, "style=dotted");
1080                                 fprintf(file, "];\n");
1081                         }
1082                 }
1083         }
1084 }
1085
1086
1087 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1088         ifg_is_dump_node,
1089         ifg_dump_graph_attr,
1090         ifg_dump_node_attr,
1091         NULL,
1092         NULL,
1093         ifg_dump_at_end
1094 };
1095
1096
1097
1098 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1099 {
1100         co_ifg_dump_t cod;
1101
1102         cod.co    = co;
1103         cod.flags = flags;
1104         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1105 }
1106
1107
1108 void co_solve_park_moon(copy_opt_t *opt)
1109 {
1110         (void) opt;
1111 }
1112
1113 /*
1114     __  __       _         ____       _
1115    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1116    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1117    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1118    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1119
1120 */
1121
1122 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1123 {
1124         FILE *result;
1125         char buf[1024];
1126         size_t i, n;
1127         char *tu_name;
1128
1129         n = strlen(env->birg->main_env->cup_name);
1130         tu_name = XMALLOCN(char, n + 1);
1131         strcpy(tu_name, env->birg->main_env->cup_name);
1132         for (i = 0; i < n; ++i)
1133                 if (tu_name[i] == '.')
1134                         tu_name[i] = '_';
1135
1136
1137         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1138         xfree(tu_name);
1139         result = fopen(buf, "wt");
1140         if (result == NULL) {
1141                 panic("Couldn't open '%s' for writing.", buf);
1142         }
1143
1144         return result;
1145 }
1146
1147 void co_driver(be_chordal_env_t *cenv)
1148 {
1149         ir_timer_t          *timer = ir_timer_new();
1150         co_complete_stats_t before, after;
1151         copy_opt_t          *co;
1152         int                 was_optimal = 0;
1153
1154         assert(selected_copyopt);
1155
1156         /* skip copymin if algo is 'none' */
1157         if (selected_copyopt->copyopt == void_algo)
1158                 return;
1159
1160         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1161
1162         co = new_copy_opt(cenv, cost_func);
1163         co_build_ou_structure(co);
1164         co_build_graph_structure(co);
1165
1166         co_complete_stats(co, &before);
1167
1168         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1169         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1170         be_stat_ev_ull("co_max_costs",    before.max_costs);
1171         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1172         be_stat_ev_ull("co_aff_int",      before.aff_int);
1173
1174         be_stat_ev_ull("co_init_costs",   before.costs);
1175         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1176
1177         if (dump_flags & DUMP_BEFORE) {
1178                 FILE *f = my_open(cenv, "", "-before.dot");
1179                 co_dump_ifg_dot(co, f, style_flags);
1180                 fclose(f);
1181         }
1182
1183         /* if the algo can improve results, provide an initial solution with heur1 */
1184         if (improve && selected_copyopt->can_improve_existing) {
1185                 co_complete_stats_t stats;
1186
1187                 /* produce a heuristic solution */
1188                 co_solve_heuristic(co);
1189
1190                 /* do the stats and provide the current costs */
1191                 co_complete_stats(co, &stats);
1192                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1193         }
1194
1195         /* perform actual copy minimization */
1196         ir_timer_reset_and_start(timer);
1197         was_optimal = selected_copyopt->copyopt(co);
1198         ir_timer_stop(timer);
1199
1200         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1201         be_stat_ev_ull("co_optimal", was_optimal);
1202         ir_timer_free(timer);
1203
1204         if (dump_flags & DUMP_AFTER) {
1205                 FILE *f = my_open(cenv, "", "-after.dot");
1206                 co_dump_ifg_dot(co, f, style_flags);
1207                 fclose(f);
1208         }
1209
1210         co_complete_stats(co, &after);
1211
1212         if (do_stats) {
1213                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1214                 ulong64 evitable          = after.costs     - after.inevit_costs;
1215
1216                 ir_printf("%30F ", cenv->irg);
1217                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1218
1219                 if (optimizable_costs > 0)
1220                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1221                 else
1222                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1223         }
1224
1225         /* Dump the interference graph in Appel's format. */
1226         if (dump_flags & DUMP_APPEL) {
1227                 FILE *f = my_open(cenv, "", ".apl");
1228                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1229                 co_dump_appel_graph(co, f);
1230                 fclose(f);
1231         }
1232
1233         be_stat_ev_ull("co_after_costs", after.costs);
1234         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1235
1236         co_free_graph_structure(co);
1237         co_free_ou_structure(co);
1238         free_copy_opt(co);
1239 }