b92b32deb0dbec4e3173c7a6ccd00a6cd4bcc5b0
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "irbitset.h"
46 #include "irphase_t.h"
47 #include "irprintf_t.h"
48
49 #include "bemodule.h"
50 #include "bearch.h"
51 #include "benode.h"
52 #include "beutil.h"
53 #include "beifg.h"
54 #include "beintlive_t.h"
55 #include "becopyopt_t.h"
56 #include "becopystat.h"
57 #include "belive_t.h"
58 #include "beinsn_t.h"
59 #include "besched.h"
60 #include "bestatevent.h"
61 #include "beirg.h"
62 #include "error.h"
63
64 #include "lc_opts.h"
65 #include "lc_opts_enum.h"
66
67 #define DUMP_BEFORE 1
68 #define DUMP_AFTER  2
69 #define DUMP_APPEL  4
70 #define DUMP_ALL    2 * DUMP_APPEL - 1
71
72 #define COST_FUNC_FREQ     1
73 #define COST_FUNC_LOOP     2
74 #define COST_FUNC_ALL_ONE  3
75
76 static unsigned   dump_flags  = 0;
77 static unsigned   style_flags = 0;
78 static unsigned   do_stats    = 0;
79 static cost_fct_t cost_func   = co_get_costs_exec_freq;
80 static int        improve     = 1;
81
82 static const lc_opt_enum_mask_items_t dump_items[] = {
83         { "before",  DUMP_BEFORE },
84         { "after",   DUMP_AFTER  },
85         { "appel",   DUMP_APPEL  },
86         { "all",     DUMP_ALL    },
87         { NULL,      0 }
88 };
89
90 static const lc_opt_enum_mask_items_t style_items[] = {
91         { "color",   CO_IFG_DUMP_COLORS },
92         { "labels",  CO_IFG_DUMP_LABELS },
93         { "constr",  CO_IFG_DUMP_CONSTR },
94         { "shape",   CO_IFG_DUMP_SHAPE  },
95         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
96         { NULL,      0 }
97 };
98
99 typedef int (*opt_funcptr)(void);
100
101 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
102         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
103         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
104         { "one",    (opt_funcptr) co_get_costs_all_one },
105         { NULL,     NULL }
106 };
107
108 static lc_opt_enum_mask_var_t dump_var = {
109         &dump_flags, dump_items
110 };
111
112 static lc_opt_enum_mask_var_t style_var = {
113         &style_flags, style_items
114 };
115
116 static lc_opt_enum_func_ptr_var_t cost_func_var = {
117         (opt_funcptr*) &cost_func, cost_func_items
118 };
119
120 static const lc_opt_table_entry_t options[] = {
121         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
122         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
123         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
124         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
125         LC_OPT_ENT_BOOL          ("improve", "run heur1 before if algo can exploit start solutions",    &improve),
126         LC_OPT_LAST
127 };
128
129 static be_module_list_entry_t *copyopts = NULL;
130 static const co_algo_info *selected_copyopt = NULL;
131
132 void be_register_copyopt(const char *name, co_algo_info *copyopt)
133 {
134         if (selected_copyopt == NULL)
135                 selected_copyopt = copyopt;
136         be_add_module_to_list(&copyopts, name, copyopt);
137 }
138
139 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copyopt);
140 void be_init_copyopt(void)
141 {
142         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
143         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
144         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
145         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
146
147         lc_opt_add_table(co_grp, options);
148         be_add_module_list_opt(co_grp, "algo", "select copy optimization algo",
149                                        &copyopts, (void**) &selected_copyopt);
150 }
151
152 static int void_algo(copy_opt_t *co)
153 {
154         (void) co;
155         return 0;
156 }
157
158 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copynone);
159 void be_init_copynone(void)
160 {
161         static co_algo_info copyheur = {
162                 void_algo, 0
163         };
164
165         be_register_copyopt("none", &copyheur);
166 }
167
168 #undef QUICK_AND_DIRTY_HACK
169
170 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
171 {
172         if (env->ifg)
173                 return be_ifg_connected(env->ifg, a, b);
174         else
175                 return be_values_interfere(env->birg->lv, a, b);
176 }
177
178
179 /******************************************************************************
180     _____                           _
181    / ____|                         | |
182   | |  __  ___ _ __   ___ _ __ __ _| |
183   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
184   | |__| |  __/ | | |  __/ | | (_| | |
185    \_____|\___|_| |_|\___|_|  \__,_|_|
186
187  ******************************************************************************/
188
189 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
190
191
192 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
193 {
194         const char *s1, *s2, *s3;
195         int len;
196         copy_opt_t *co;
197
198         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
199
200         co = XMALLOCZ(copy_opt_t);
201         co->cenv      = chordal_env;
202         co->irg       = chordal_env->irg;
203         co->cls       = chordal_env->cls;
204         co->get_costs = get_costs;
205
206         s1 = get_irp_name();
207         s2 = get_entity_name(get_irg_entity(co->irg));
208         s3 = chordal_env->cls->name;
209         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
210         co->name = XMALLOCN(char, len);
211         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
212
213         return co;
214 }
215
216 void free_copy_opt(copy_opt_t *co)
217 {
218         xfree(co->name);
219         free(co);
220 }
221
222 /**
223  * Checks if a node is optimizable, viz. has something to do with coalescing
224  * @param irn  The irn to check
225  */
226 static int co_is_optimizable_root(ir_node *irn)
227 {
228         const arch_register_req_t *req;
229         const arch_register_t     *reg;
230
231         if (arch_irn_is_ignore(irn))
232                 return 0;
233
234         reg = arch_get_irn_register(irn);
235         if (arch_register_type_is(reg, ignore))
236                 return 0;
237
238         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
239                 return 1;
240
241         req = arch_get_register_req_out(irn);
242         if (is_2addr_code(req))
243                 return 1;
244
245         return 0;
246 }
247
248 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
249 {
250         int cost = 0;
251         ir_loop *loop;
252         ir_node *root_block = get_nodes_block(root);
253         (void) co;
254         (void) arg;
255
256         if (is_Phi(root)) {
257                 /* for phis the copies are placed in the corresponding pred-block */
258                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
259         } else {
260                 /* a perm places the copy in the same block as it resides */
261                 loop = get_irn_loop(root_block);
262         }
263         if (loop) {
264                 int d = get_loop_depth(loop);
265                 cost = d*d;
266         }
267         return 1+cost;
268 }
269
270 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
271 {
272         int res;
273         ir_node *root_bl = get_nodes_block(root);
274         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
275         (void) arg;
276         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
277
278         /* don't allow values smaller than one. */
279         return res < 1 ? 1 : res;
280 }
281
282
283 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos)
284 {
285         (void) co;
286         (void) root;
287         (void) arg;
288         (void) pos;
289         return 1;
290 }
291
292 /******************************************************************************
293    ____        _   _    _       _ _          _____ _
294   / __ \      | | | |  | |     (_) |        / ____| |
295  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
296  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
297  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
298   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
299         | |                                                            __/ |
300         |_|                                                           |___/
301  ******************************************************************************/
302
303 /**
304  * Determines a maximum weighted independent set with respect to
305  * the interference and conflict edges of all nodes in a qnode.
306  */
307 static int ou_max_ind_set_costs(unit_t *ou)
308 {
309         be_chordal_env_t *chordal_env = ou->co->cenv;
310         ir_node **safe, **unsafe;
311         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
312         bitset_t *curr;
313         unsigned  pos;
314         int curr_weight, best_weight = 0;
315
316         /* assign the nodes into two groups.
317          * safe: node has no interference, hence it is in every max stable set.
318          * unsafe: node has an interference
319          */
320         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
321         safe_costs   = 0;
322         safe_count   = 0;
323         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
324         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
325         unsafe_count = 0;
326         for (i=1; i<ou->node_count; ++i) {
327                 int is_safe = 1;
328                 for (o=1; o<ou->node_count; ++o) {
329                         if (i==o)
330                                 continue;
331                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
332                                 unsafe_costs[unsafe_count] = ou->costs[i];
333                                 unsafe[unsafe_count] = ou->nodes[i];
334                                 ++unsafe_count;
335                                 is_safe = 0;
336                                 break;
337                         }
338                 }
339                 if (is_safe) {
340                         safe_costs += ou->costs[i];
341                         safe[safe_count++] = ou->nodes[i];
342                 }
343         }
344
345
346         /* now compute the best set out of the unsafe nodes*/
347         if (unsafe_count > MIS_HEUR_TRIGGER) {
348                 bitset_t *best = bitset_alloca(unsafe_count);
349                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
350                 for (i=0; i<unsafe_count; ++i) {
351                         bitset_set(best, i);
352                         /* check if it is a stable set */
353                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
354                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
355                                         bitset_clear(best, i); /* clear the bit and try next one */
356                                         break;
357                                 }
358                 }
359                 /* compute the weight */
360                 bitset_foreach(best, pos)
361                         best_weight += unsafe_costs[pos];
362         } else {
363                 /* Exact Algorithm: Brute force */
364                 curr = bitset_alloca(unsafe_count);
365                 bitset_set_all(curr);
366                 while (bitset_popcount(curr) != 0) {
367                         /* check if curr is a stable set */
368                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
369                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
370                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
371                                                         goto no_stable_set;
372
373                         /* if we arrive here, we have a stable set */
374                         /* compute the weigth of the stable set*/
375                         curr_weight = 0;
376                         bitset_foreach(curr, pos)
377                                 curr_weight += unsafe_costs[pos];
378
379                         /* any better ? */
380                         if (curr_weight > best_weight) {
381                                 best_weight = curr_weight;
382                         }
383
384         no_stable_set:
385                         bitset_minus1(curr);
386                 }
387         }
388
389         return safe_costs+best_weight;
390 }
391
392 static void co_collect_units(ir_node *irn, void *env)
393 {
394         const arch_register_req_t *req;
395         copy_opt_t                *co  = env;
396         unit_t *unit;
397
398         if (get_irn_mode(irn) == mode_T)
399                 return;
400         req = arch_get_register_req_out(irn);
401         if (req->cls != co->cls)
402                 return;
403         if (!co_is_optimizable_root(irn))
404                 return;
405
406         /* Init a new unit */
407         unit = XMALLOCZ(unit_t);
408         unit->co = co;
409         unit->node_count = 1;
410         INIT_LIST_HEAD(&unit->queue);
411
412         /* Phi with some/all of its arguments */
413         if (is_Reg_Phi(irn)) {
414                 int i, arity;
415
416                 /* init */
417                 arity = get_irn_arity(irn);
418                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
419                 unit->costs = XMALLOCN(int,      arity + 1);
420                 unit->nodes[0] = irn;
421
422                 /* fill */
423                 for (i=0; i<arity; ++i) {
424                         int o, arg_pos;
425                         ir_node *arg = get_irn_n(irn, i);
426
427                         assert(arch_get_irn_reg_class_out(arg) == co->cls && "Argument not in same register class.");
428                         if (arg == irn)
429                                 continue;
430                         if (nodes_interfere(co->cenv, irn, arg)) {
431                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
432                                 continue;
433                         }
434
435                         /* Else insert the argument of the phi to the members of this ou */
436                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
437
438                         if (arch_irn_is_ignore(arg))
439                                 continue;
440
441                         /* Check if arg has occurred at a prior position in the arg/list */
442                         arg_pos = 0;
443                         for (o=1; o<unit->node_count; ++o) {
444                                 if (unit->nodes[o] == arg) {
445                                         arg_pos = o;
446                                         break;
447                                 }
448                         }
449
450                         if (!arg_pos) { /* a new argument */
451                                 /* insert node, set costs */
452                                 unit->nodes[unit->node_count] = arg;
453                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
454                                 unit->node_count++;
455                         } else { /* arg has occurred before in same phi */
456                                 /* increase costs for existing arg */
457                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
458                         }
459                 }
460                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
461                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
462         } else if (is_Perm_Proj(irn)) {
463                 /* Proj of a perm with corresponding arg */
464                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
465                 unit->nodes = XMALLOCN(ir_node*, 2);
466                 unit->costs = XMALLOCN(int,      2);
467                 unit->node_count = 2;
468                 unit->nodes[0] = irn;
469                 unit->nodes[1] = get_Perm_src(irn);
470                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
471         } else {
472                 /* Src == Tgt of a 2-addr-code instruction */
473                 if (is_2addr_code(req)) {
474                         const unsigned other = req->other_same;
475                         int            count = 0;
476                         int            i;
477
478                         for (i = 0; (1U << i) <= other; ++i) {
479                                 if (other & (1U << i)) {
480                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
481                                         if (arch_irn_is_ignore(o))
482                                                 continue;
483                                         if (nodes_interfere(co->cenv, irn, o))
484                                                 continue;
485                                         ++count;
486                                 }
487                         }
488
489                         if (count != 0) {
490                                 int k = 0;
491                                 ++count;
492                                 unit->nodes = XMALLOCN(ir_node*, count);
493                                 unit->costs = XMALLOCN(int,      count);
494                                 unit->node_count = count;
495                                 unit->nodes[k++] = irn;
496
497                                 for (i = 0; 1U << i <= other; ++i) {
498                                         if (other & (1U << i)) {
499                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
500                                                 if (!arch_irn_is_ignore(o) &&
501                                                                 !nodes_interfere(co->cenv, irn, o)) {
502                                                         unit->nodes[k] = o;
503                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
504                                                         ++k;
505                                                 }
506                                         }
507                                 }
508                         }
509                 } else {
510                         assert(0 && "This is not an optimizable node!");
511                 }
512         }
513
514         /* Insert the new unit at a position according to its costs */
515         if (unit->node_count > 1) {
516                 int i;
517                 struct list_head *tmp;
518
519                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
520                 for (i=1; i<unit->node_count; ++i) {
521                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
522                         unit->all_nodes_costs += unit->costs[i];
523                 }
524
525                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
526                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
527                 /* Insert the new ou according to its sort_key */
528                 tmp = &co->units;
529                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
530                         tmp = tmp->next;
531                 list_add(&unit->units, tmp);
532         } else {
533                 free(unit);
534         }
535 }
536
537 #ifdef QUICK_AND_DIRTY_HACK
538
539 static int compare_ous(const void *k1, const void *k2)
540 {
541         const unit_t *u1 = *((const unit_t **) k1);
542         const unit_t *u2 = *((const unit_t **) k2);
543         int i, o, u1_has_constr, u2_has_constr;
544         arch_register_req_t req;
545
546         /* Units with constraints come first */
547         u1_has_constr = 0;
548         for (i=0; i<u1->node_count; ++i) {
549                 arch_get_register_req_out(&req, u1->nodes[i]);
550                 if (arch_register_req_is(&req, limited)) {
551                         u1_has_constr = 1;
552                         break;
553                 }
554         }
555
556         u2_has_constr = 0;
557         for (i=0; i<u2->node_count; ++i) {
558                 arch_get_register_req_out(&req, u2->nodes[i]);
559                 if (arch_register_req_is(&req, limited)) {
560                         u2_has_constr = 1;
561                         break;
562                 }
563         }
564
565         if (u1_has_constr != u2_has_constr)
566                 return u2_has_constr - u1_has_constr;
567
568         /* Now check, whether the two units are connected */
569 #if 0
570         for (i=0; i<u1->node_count; ++i)
571                 for (o=0; o<u2->node_count; ++o)
572                         if (u1->nodes[i] == u2->nodes[o])
573                                 return 0;
574 #endif
575
576         /* After all, the sort key decides. Greater keys come first. */
577         return u2->sort_key - u1->sort_key;
578
579 }
580
581 /**
582  * Sort the ou's according to constraints and their sort_key
583  */
584 static void co_sort_units(copy_opt_t *co)
585 {
586         int i, count = 0, costs;
587         unit_t *ou, **ous;
588
589         /* get the number of ous, remove them form the list and fill the array */
590         list_for_each_entry(unit_t, ou, &co->units, units)
591                 count++;
592         ous = ALLOCAN(unit_t, count);
593
594         costs = co_get_max_copy_costs(co);
595
596         i = 0;
597         list_for_each_entry(unit_t, ou, &co->units, units)
598                 ous[i++] = ou;
599
600         INIT_LIST_HEAD(&co->units);
601
602         assert(count == i && list_empty(&co->units));
603
604         for (i=0; i<count; ++i)
605                 ir_printf("%+F\n", ous[i]->nodes[0]);
606
607         qsort(ous, count, sizeof(*ous), compare_ous);
608
609         ir_printf("\n\n");
610         for (i=0; i<count; ++i)
611                 ir_printf("%+F\n", ous[i]->nodes[0]);
612
613         /* reinsert into list in correct order */
614         for (i=0; i<count; ++i)
615                 list_add_tail(&ous[i]->units, &co->units);
616
617         assert(costs == co_get_max_copy_costs(co));
618 }
619 #endif
620
621 void co_build_ou_structure(copy_opt_t *co)
622 {
623         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
624         INIT_LIST_HEAD(&co->units);
625         irg_walk_graph(co->irg, co_collect_units, NULL, co);
626 #ifdef QUICK_AND_DIRTY_HACK
627         co_sort_units(co);
628 #endif
629 }
630
631 void co_free_ou_structure(copy_opt_t *co)
632 {
633         unit_t *curr, *tmp;
634         ASSERT_OU_AVAIL(co);
635         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
636                 xfree(curr->nodes);
637                 xfree(curr->costs);
638                 xfree(curr);
639         }
640         co->units.next = NULL;
641 }
642
643 /* co_solve_heuristic() is implemented in becopyheur.c */
644
645 int co_get_max_copy_costs(const copy_opt_t *co)
646 {
647         int i, res = 0;
648         unit_t *curr;
649
650         ASSERT_OU_AVAIL(co);
651
652         list_for_each_entry(unit_t, curr, &co->units, units) {
653                 res += curr->inevitable_costs;
654                 for (i=1; i<curr->node_count; ++i)
655                         res += curr->costs[i];
656         }
657         return res;
658 }
659
660 int co_get_inevit_copy_costs(const copy_opt_t *co)
661 {
662         int res = 0;
663         unit_t *curr;
664
665         ASSERT_OU_AVAIL(co);
666
667         list_for_each_entry(unit_t, curr, &co->units, units)
668                 res += curr->inevitable_costs;
669         return res;
670 }
671
672 int co_get_copy_costs(const copy_opt_t *co)
673 {
674         int i, res = 0;
675         unit_t *curr;
676
677         ASSERT_OU_AVAIL(co);
678
679         list_for_each_entry(unit_t, curr, &co->units, units) {
680                 int root_col = get_irn_col(curr->nodes[0]);
681                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
682                 res += curr->inevitable_costs;
683                 for (i=1; i<curr->node_count; ++i) {
684                         int arg_col = get_irn_col(curr->nodes[i]);
685                         if (root_col != arg_col) {
686                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
687                                 res += curr->costs[i];
688                         }
689                 }
690         }
691         return res;
692 }
693
694 int co_get_lower_bound(const copy_opt_t *co)
695 {
696         int res = 0;
697         unit_t *curr;
698
699         ASSERT_OU_AVAIL(co);
700
701         list_for_each_entry(unit_t, curr, &co->units, units)
702                 res += curr->inevitable_costs + curr->min_nodes_costs;
703         return res;
704 }
705
706 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
707 {
708         bitset_t *seen = bitset_irg_malloc(co->irg);
709         affinity_node_t *an;
710
711         memset(stat, 0, sizeof(stat[0]));
712
713         /* count affinity edges. */
714         co_gs_foreach_aff_node(co, an) {
715                 neighb_t *neigh;
716                 stat->aff_nodes += 1;
717                 bitset_add_irn(seen, an->irn);
718                 co_gs_foreach_neighb(an, neigh) {
719                         if (!bitset_contains_irn(seen, neigh->irn)) {
720                                 stat->aff_edges += 1;
721                                 stat->max_costs += neigh->costs;
722
723                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
724                                         stat->costs += neigh->costs;
725                                         stat->unsatisfied_edges += 1;
726                                 }
727
728                                 if (nodes_interfere(co->cenv, an->irn, neigh->irn)) {
729                                         stat->aff_int += 1;
730                                         stat->inevit_costs += neigh->costs;
731                                 }
732
733                         }
734                 }
735         }
736
737         bitset_free(seen);
738 }
739
740 /******************************************************************************
741    _____                 _        _____ _
742   / ____|               | |      / ____| |
743  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
744  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
745  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
746   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
747                   | |                                       __/ |
748                   |_|                                      |___/
749  ******************************************************************************/
750
751 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size)
752 {
753         const affinity_node_t *n1 = k1;
754         const affinity_node_t *n2 = k2;
755         (void) size;
756
757         return (n1->irn != n2->irn);
758 }
759
760 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
761 {
762         affinity_node_t new_node, *node;
763         neighb_t        *nbr;
764         int             allocnew = 1;
765
766         new_node.irn        = n1;
767         new_node.degree     = 0;
768         new_node.neighbours = NULL;
769         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
770
771         for (nbr = node->neighbours; nbr; nbr = nbr->next)
772                 if (nbr->irn == n2) {
773                         allocnew = 0;
774                         break;
775                 }
776
777         /* if we did not find n2 in n1's neighbourhood insert it */
778         if (allocnew) {
779                 nbr        = OALLOC(&co->obst, neighb_t);
780                 nbr->irn   = n2;
781                 nbr->costs = 0;
782                 nbr->next  = node->neighbours;
783
784                 node->neighbours = nbr;
785                 node->degree++;
786         }
787
788         /* now nbr points to n1's neighbour-entry of n2 */
789         nbr->costs += costs;
790 }
791
792 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
793 {
794         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
795                 add_edge(co, n1, n2, costs);
796                 add_edge(co, n2, n1, costs);
797         }
798 }
799
800 static void build_graph_walker(ir_node *irn, void *env)
801 {
802         const arch_register_req_t *req;
803         copy_opt_t                *co  = env;
804         int pos, max;
805         const arch_register_t *reg;
806
807         if (get_irn_mode(irn) == mode_T)
808                 return;
809         req = arch_get_register_req_out(irn);
810         if (req->cls != co->cls || arch_irn_is_ignore(irn))
811                 return;
812
813         reg = arch_get_irn_register(irn);
814         if (arch_register_type_is(reg, ignore))
815                 return;
816
817         if (is_Reg_Phi(irn)) { /* Phis */
818                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
819                         ir_node *arg = get_irn_n(irn, pos);
820                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
821                 }
822         } else if (is_Perm_Proj(irn)) { /* Perms */
823                 ir_node *arg = get_Perm_src(irn);
824                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
825         } else { /* 2-address code */
826                 if (is_2addr_code(req)) {
827                         const unsigned other = req->other_same;
828                         int i;
829
830                         for (i = 0; 1U << i <= other; ++i) {
831                                 if (other & (1U << i)) {
832                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
833                                         if (!arch_irn_is_ignore(other))
834                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
835                                 }
836                         }
837                 }
838         }
839 }
840
841 void co_build_graph_structure(copy_opt_t *co)
842 {
843         obstack_init(&co->obst);
844         co->nodes = new_set(compare_affinity_node_t, 32);
845
846         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
847 }
848
849 void co_free_graph_structure(copy_opt_t *co)
850 {
851         ASSERT_GS_AVAIL(co);
852
853         del_set(co->nodes);
854         obstack_free(&co->obst, NULL);
855         co->nodes = NULL;
856 }
857
858 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
859
860 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn)
861 {
862         affinity_node_t new_node, *n;
863
864         ASSERT_GS_AVAIL(co);
865
866         new_node.irn = irn;
867         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
868         if (n) {
869                 return (n->degree > 0);
870         } else
871                 return 0;
872 }
873
874 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
875 {
876         ir_node *nodes[]  = { a, b };
877         bitset_t *constr[] = { NULL, NULL };
878         int j;
879
880         constr[0] = bitset_alloca(co->cls->n_regs);
881         constr[1] = bitset_alloca(co->cls->n_regs);
882
883         for (j = 0; j < 2; ++j) {
884                 const arch_register_req_t *req = arch_get_register_req_out(nodes[j]);
885                 if (arch_register_req_is(req, limited))
886                         rbitset_copy_to_bitset(req->limited, constr[j]);
887                 else
888                         bitset_set_all(constr[j]);
889
890         }
891
892         return !bitset_intersect(constr[0], constr[1]);
893 }
894
895 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
896 {
897         be_ifg_t *ifg       = co->cenv->ifg;
898         int      *color_map = ALLOCAN(int, co->cls->n_regs);
899         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
900
901         ir_node *irn;
902         nodes_iter_t it;
903         neighbours_iter_t nit;
904         int n, n_regs;
905         unsigned i;
906
907         n_regs = 0;
908         for (i = 0; i < co->cls->n_regs; ++i) {
909                 const arch_register_t *reg = &co->cls->regs[i];
910                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
911         }
912
913         /*
914          * n contains the first node number.
915          * the values below n are the pre-colored register nodes
916          */
917
918         n = n_regs;
919         be_ifg_foreach_node(ifg, &it, irn) {
920                 if (arch_irn_is_ignore(irn))
921                         continue;
922                 node_map[get_irn_idx(irn)] = n++;
923         }
924
925         fprintf(f, "%d %d\n", n, n_regs);
926
927         be_ifg_foreach_node(ifg, &it, irn) {
928                 if (!arch_irn_is_ignore(irn)) {
929                         int idx                        = node_map[get_irn_idx(irn)];
930                         affinity_node_t           *a   = get_affinity_info(co, irn);
931                         const arch_register_req_t *req = arch_get_register_req_out(irn);
932                         ir_node                   *adj;
933
934                         if (arch_register_req_is(req, limited)) {
935                                 for (i = 0; i < co->cls->n_regs; ++i) {
936                                         if (!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
937                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
938                                 }
939                         }
940
941                         be_ifg_foreach_neighbour(ifg, &nit, irn, adj) {
942                                 if (!arch_irn_is_ignore(adj) &&
943                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
944                                         int adj_idx = node_map[get_irn_idx(adj)];
945                                         if (idx < adj_idx)
946                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
947                                 }
948                         }
949
950                         if (a) {
951                                 neighb_t *n;
952
953                                 co_gs_foreach_neighb(a, n) {
954                                         if (!arch_irn_is_ignore(n->irn)) {
955                                                 int n_idx = node_map[get_irn_idx(n->irn)];
956                                                 if (idx < n_idx)
957                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
958                                         }
959                                 }
960                         }
961                 }
962         }
963
964         xfree(node_map);
965 }
966
967 /*
968          ___ _____ ____   ____   ___ _____   ____                        _
969         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
970          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
971          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
972         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
973                                                                   |_|            |___/
974 */
975
976 static const char *get_dot_color_name(size_t col)
977 {
978         static const char *names[] = {
979                 "blue",
980                 "red",
981                 "green",
982                 "yellow",
983                 "cyan",
984                 "magenta",
985                 "orange",
986                 "chocolate",
987                 "beige",
988                 "navy",
989                 "darkgreen",
990                 "darkred",
991                 "lightPink",
992                 "chartreuse",
993                 "lightskyblue",
994                 "linen",
995                 "pink",
996                 "lightslateblue",
997                 "mintcream",
998                 "red",
999                 "darkolivegreen",
1000                 "mediumblue",
1001                 "mistyrose",
1002                 "salmon",
1003                 "darkseagreen",
1004                 "mediumslateblue"
1005                 "moccasin",
1006                 "tomato",
1007                 "forestgreen",
1008                 "darkturquoise",
1009                 "palevioletred"
1010         };
1011
1012         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1013 }
1014
1015 typedef struct _co_ifg_dump_t {
1016         const copy_opt_t *co;
1017         unsigned flags;
1018 } co_ifg_dump_t;
1019
1020 static void ifg_dump_graph_attr(FILE *f, void *self)
1021 {
1022         (void) self;
1023         fprintf(f, "overlap=scale");
1024 }
1025
1026 static int ifg_is_dump_node(void *self, ir_node *irn)
1027 {
1028         (void)self;
1029         return !arch_irn_is_ignore(irn);
1030 }
1031
1032 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1033 {
1034         co_ifg_dump_t             *env     = self;
1035         const arch_register_t     *reg     = arch_get_irn_register(irn);
1036         const arch_register_req_t *req     = arch_get_register_req_out(irn);
1037         int                        limited = arch_register_req_is(req, limited);
1038
1039         if (env->flags & CO_IFG_DUMP_LABELS) {
1040                 ir_fprintf(f, "label=\"%+F", irn);
1041
1042                 if ((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1043                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1044                         rbitset_copy_to_bitset(req->limited, bs);
1045                         ir_fprintf(f, "\\n%B", bs);
1046                 }
1047                 ir_fprintf(f, "\" ");
1048         } else {
1049                 fprintf(f, "label=\"\" shape=point " );
1050         }
1051
1052         if (env->flags & CO_IFG_DUMP_SHAPE)
1053                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1054
1055         if (env->flags & CO_IFG_DUMP_COLORS)
1056                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1057 }
1058
1059 static void ifg_dump_at_end(FILE *file, void *self)
1060 {
1061         co_ifg_dump_t *env = self;
1062         affinity_node_t *a;
1063
1064         co_gs_foreach_aff_node(env->co, a) {
1065                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1066                 unsigned aidx = get_irn_idx(a->irn);
1067                 neighb_t *n;
1068
1069                 co_gs_foreach_neighb(a, n) {
1070                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1071                         unsigned nidx = get_irn_idx(n->irn);
1072
1073                         if (aidx < nidx) {
1074                                 const char *color = nr == ar ? "blue" : "red";
1075                                 fprintf(file, "\tn%u -- n%u [weight=0.01 ", aidx, nidx);
1076                                 if (env->flags & CO_IFG_DUMP_LABELS)
1077                                         fprintf(file, "label=\"%d\" ", n->costs);
1078                                 if (env->flags & CO_IFG_DUMP_COLORS)
1079                                         fprintf(file, "color=%s ", color);
1080                                 else
1081                                         fprintf(file, "style=dotted");
1082                                 fprintf(file, "];\n");
1083                         }
1084                 }
1085         }
1086 }
1087
1088
1089 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1090         ifg_is_dump_node,
1091         ifg_dump_graph_attr,
1092         ifg_dump_node_attr,
1093         NULL,
1094         NULL,
1095         ifg_dump_at_end
1096 };
1097
1098
1099
1100 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1101 {
1102         co_ifg_dump_t cod;
1103
1104         cod.co    = co;
1105         cod.flags = flags;
1106         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1107 }
1108
1109
1110 void co_solve_park_moon(copy_opt_t *opt)
1111 {
1112         (void) opt;
1113 }
1114
1115 /*
1116     __  __       _         ____       _
1117    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1118    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1119    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1120    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1121
1122 */
1123
1124 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1125 {
1126         FILE *result;
1127         char buf[1024];
1128         size_t i, n;
1129         char *tu_name;
1130
1131         n = strlen(env->birg->main_env->cup_name);
1132         tu_name = XMALLOCN(char, n + 1);
1133         strcpy(tu_name, env->birg->main_env->cup_name);
1134         for (i = 0; i < n; ++i)
1135                 if (tu_name[i] == '.')
1136                         tu_name[i] = '_';
1137
1138
1139         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1140         xfree(tu_name);
1141         result = fopen(buf, "wt");
1142         if (result == NULL) {
1143                 panic("Couldn't open '%s' for writing.", buf);
1144         }
1145
1146         return result;
1147 }
1148
1149 void co_driver(be_chordal_env_t *cenv)
1150 {
1151         ir_timer_t          *timer = ir_timer_new();
1152         co_complete_stats_t before, after;
1153         copy_opt_t          *co;
1154         int                 was_optimal = 0;
1155
1156         assert(selected_copyopt);
1157
1158         /* skip copymin if algo is 'none' */
1159         if (selected_copyopt->copyopt == void_algo)
1160                 return;
1161
1162         be_liveness_assure_chk(be_get_irg_liveness(cenv->irg));
1163
1164         co = new_copy_opt(cenv, cost_func);
1165         co_build_ou_structure(co);
1166         co_build_graph_structure(co);
1167
1168         co_complete_stats(co, &before);
1169
1170         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1171         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1172         be_stat_ev_ull("co_max_costs",    before.max_costs);
1173         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1174         be_stat_ev_ull("co_aff_int",      before.aff_int);
1175
1176         be_stat_ev_ull("co_init_costs",   before.costs);
1177         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1178
1179         if (dump_flags & DUMP_BEFORE) {
1180                 FILE *f = my_open(cenv, "", "-before.dot");
1181                 co_dump_ifg_dot(co, f, style_flags);
1182                 fclose(f);
1183         }
1184
1185         /* if the algo can improve results, provide an initial solution with heur1 */
1186         if (improve && selected_copyopt->can_improve_existing) {
1187                 co_complete_stats_t stats;
1188
1189                 /* produce a heuristic solution */
1190                 co_solve_heuristic(co);
1191
1192                 /* do the stats and provide the current costs */
1193                 co_complete_stats(co, &stats);
1194                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1195         }
1196
1197         /* perform actual copy minimization */
1198         ir_timer_reset_and_start(timer);
1199         was_optimal = selected_copyopt->copyopt(co);
1200         ir_timer_stop(timer);
1201
1202         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1203         be_stat_ev_ull("co_optimal", was_optimal);
1204         ir_timer_free(timer);
1205
1206         if (dump_flags & DUMP_AFTER) {
1207                 FILE *f = my_open(cenv, "", "-after.dot");
1208                 co_dump_ifg_dot(co, f, style_flags);
1209                 fclose(f);
1210         }
1211
1212         co_complete_stats(co, &after);
1213
1214         if (do_stats) {
1215                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1216                 ulong64 evitable          = after.costs     - after.inevit_costs;
1217
1218                 ir_printf("%30F ", cenv->irg);
1219                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1220
1221                 if (optimizable_costs > 0)
1222                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1223                 else
1224                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1225         }
1226
1227         /* Dump the interference graph in Appel's format. */
1228         if (dump_flags & DUMP_APPEL) {
1229                 FILE *f = my_open(cenv, "", ".apl");
1230                 fprintf(f, "# %llu %llu\n", after.costs, after.unsatisfied_edges);
1231                 co_dump_appel_graph(co, f);
1232                 fclose(f);
1233         }
1234
1235         be_stat_ev_ull("co_after_costs", after.costs);
1236         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1237
1238         co_free_graph_structure(co);
1239         co_free_ou_structure(co);
1240         free_copy_opt(co);
1241 }