rename popcnt to popcount; avoid inline assembly in favor of gcc builtin functions
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "irbitset.h"
46 #include "irphase_t.h"
47 #include "irprintf_t.h"
48
49 #include "bemodule.h"
50 #include "bearch.h"
51 #include "benode.h"
52 #include "beutil.h"
53 #include "beifg_t.h"
54 #include "beintlive_t.h"
55 #include "becopyopt_t.h"
56 #include "becopystat.h"
57 #include "belive_t.h"
58 #include "beinsn_t.h"
59 #include "besched.h"
60 #include "bestatevent.h"
61 #include "beirg.h"
62 #include "error.h"
63
64 #include "lc_opts.h"
65 #include "lc_opts_enum.h"
66
67 #define DUMP_BEFORE 1
68 #define DUMP_AFTER  2
69 #define DUMP_APPEL  4
70 #define DUMP_ALL    2 * DUMP_APPEL - 1
71
72 #define COST_FUNC_FREQ     1
73 #define COST_FUNC_LOOP     2
74 #define COST_FUNC_ALL_ONE  3
75
76 static unsigned   dump_flags  = 0;
77 static unsigned   style_flags = 0;
78 static unsigned   do_stats    = 0;
79 static cost_fct_t cost_func   = co_get_costs_exec_freq;
80 static int        improve     = 1;
81
82 static const lc_opt_enum_mask_items_t dump_items[] = {
83         { "before",  DUMP_BEFORE },
84         { "after",   DUMP_AFTER  },
85         { "appel",   DUMP_APPEL  },
86         { "all",     DUMP_ALL    },
87         { NULL,      0 }
88 };
89
90 static const lc_opt_enum_mask_items_t style_items[] = {
91         { "color",   CO_IFG_DUMP_COLORS },
92         { "labels",  CO_IFG_DUMP_LABELS },
93         { "constr",  CO_IFG_DUMP_CONSTR },
94         { "shape",   CO_IFG_DUMP_SHAPE  },
95         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
96         { NULL,      0 }
97 };
98
99 typedef int (*opt_funcptr)(void);
100
101 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
102         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
103         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
104         { "one",    (opt_funcptr) co_get_costs_all_one },
105         { NULL,     NULL }
106 };
107
108 static lc_opt_enum_mask_var_t dump_var = {
109         &dump_flags, dump_items
110 };
111
112 static lc_opt_enum_mask_var_t style_var = {
113         &style_flags, style_items
114 };
115
116 static lc_opt_enum_func_ptr_var_t cost_func_var = {
117         (opt_funcptr*) &cost_func, cost_func_items
118 };
119
120 static const lc_opt_table_entry_t options[] = {
121         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
122         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
123         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
124         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
125         LC_OPT_ENT_BOOL          ("improve", "run heur1 before if algo can exploit start solutions",    &improve),
126         LC_OPT_LAST
127 };
128
129 static be_module_list_entry_t *copyopts = NULL;
130 static const co_algo_info *selected_copyopt = NULL;
131
132 void be_register_copyopt(const char *name, co_algo_info *copyopt)
133 {
134         if (selected_copyopt == NULL)
135                 selected_copyopt = copyopt;
136         be_add_module_to_list(&copyopts, name, copyopt);
137 }
138
139 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copyopt);
140 void be_init_copyopt(void)
141 {
142         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
143         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
144         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
145         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
146
147         lc_opt_add_table(co_grp, options);
148         be_add_module_list_opt(co_grp, "algo", "select copy optimization algo",
149                                        &copyopts, (void**) &selected_copyopt);
150 }
151
152 static int void_algo(copy_opt_t *co)
153 {
154         (void) co;
155         return 0;
156 }
157
158 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copynone);
159 void be_init_copynone(void)
160 {
161         static co_algo_info copyheur = {
162                 void_algo, 0
163         };
164
165         be_register_copyopt("none", &copyheur);
166 }
167
168 #undef QUICK_AND_DIRTY_HACK
169
170 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
171 {
172         if (env->ifg)
173                 return be_ifg_connected(env->ifg, a, b);
174         else
175                 return be_values_interfere(env->birg->lv, a, b);
176 }
177
178
179 /******************************************************************************
180     _____                           _
181    / ____|                         | |
182   | |  __  ___ _ __   ___ _ __ __ _| |
183   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
184   | |__| |  __/ | | |  __/ | | (_| | |
185    \_____|\___|_| |_|\___|_|  \__,_|_|
186
187  ******************************************************************************/
188
189 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
190
191
192 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
193 {
194         const char *s1, *s2, *s3;
195         int len;
196         copy_opt_t *co;
197
198         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
199
200         co = XMALLOCZ(copy_opt_t);
201         co->cenv      = chordal_env;
202         co->irg       = chordal_env->irg;
203         co->cls       = chordal_env->cls;
204         co->get_costs = get_costs;
205
206         s1 = get_irp_name();
207         s2 = get_entity_name(get_irg_entity(co->irg));
208         s3 = chordal_env->cls->name;
209         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
210         co->name = XMALLOCN(char, len);
211         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
212
213         return co;
214 }
215
216 void free_copy_opt(copy_opt_t *co)
217 {
218         xfree(co->name);
219         free(co);
220 }
221
222 /**
223  * Checks if a node is optimizable, viz. has something to do with coalescing
224  * @param irn  The irn to check
225  */
226 static int co_is_optimizable_root(ir_node *irn)
227 {
228         const arch_register_req_t *req;
229         const arch_register_t     *reg;
230
231         if (arch_irn_is_ignore(irn))
232                 return 0;
233
234         reg = arch_get_irn_register(irn);
235         if (arch_register_type_is(reg, ignore))
236                 return 0;
237
238         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
239                 return 1;
240
241         req = arch_get_register_req_out(irn);
242         if (is_2addr_code(req))
243                 return 1;
244
245         return 0;
246 }
247
248 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
249 {
250         int cost = 0;
251         ir_loop *loop;
252         ir_node *root_block = get_nodes_block(root);
253         (void) co;
254         (void) arg;
255
256         if (is_Phi(root)) {
257                 /* for phis the copies are placed in the corresponding pred-block */
258                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
259         } else {
260                 /* a perm places the copy in the same block as it resides */
261                 loop = get_irn_loop(root_block);
262         }
263         if (loop) {
264                 int d = get_loop_depth(loop);
265                 cost = d*d;
266         }
267         return 1+cost;
268 }
269
270 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
271 {
272         int res;
273         ir_node *root_bl = get_nodes_block(root);
274         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
275         (void) arg;
276         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
277
278         /* don't allow values smaller than one. */
279         return res < 1 ? 1 : res;
280 }
281
282
283 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos)
284 {
285         (void) co;
286         (void) root;
287         (void) arg;
288         (void) pos;
289         return 1;
290 }
291
292 /******************************************************************************
293    ____        _   _    _       _ _          _____ _
294   / __ \      | | | |  | |     (_) |        / ____| |
295  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
296  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
297  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
298   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
299         | |                                                            __/ |
300         |_|                                                           |___/
301  ******************************************************************************/
302
303 /**
304  * Determines a maximum weighted independent set with respect to
305  * the interference and conflict edges of all nodes in a qnode.
306  */
307 static int ou_max_ind_set_costs(unit_t *ou)
308 {
309         be_chordal_env_t *chordal_env = ou->co->cenv;
310         ir_node **safe, **unsafe;
311         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
312         bitset_t *curr;
313         bitset_pos_t pos;
314         int max, curr_weight, best_weight = 0;
315
316         /* assign the nodes into two groups.
317          * safe: node has no interference, hence it is in every max stable set.
318          * unsafe: node has an interference
319          */
320         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
321         safe_costs   = 0;
322         safe_count   = 0;
323         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
324         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
325         unsafe_count = 0;
326         for (i=1; i<ou->node_count; ++i) {
327                 int is_safe = 1;
328                 for (o=1; o<ou->node_count; ++o) {
329                         if (i==o)
330                                 continue;
331                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
332                                 unsafe_costs[unsafe_count] = ou->costs[i];
333                                 unsafe[unsafe_count] = ou->nodes[i];
334                                 ++unsafe_count;
335                                 is_safe = 0;
336                                 break;
337                         }
338                 }
339                 if (is_safe) {
340                         safe_costs += ou->costs[i];
341                         safe[safe_count++] = ou->nodes[i];
342                 }
343         }
344
345
346         /* now compute the best set out of the unsafe nodes*/
347         if (unsafe_count > MIS_HEUR_TRIGGER) {
348                 bitset_t *best = bitset_alloca(unsafe_count);
349                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
350                 for (i=0; i<unsafe_count; ++i) {
351                         bitset_set(best, i);
352                         /* check if it is a stable set */
353                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
354                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
355                                         bitset_clear(best, i); /* clear the bit and try next one */
356                                         break;
357                                 }
358                 }
359                 /* compute the weight */
360                 bitset_foreach(best, pos)
361                         best_weight += unsafe_costs[pos];
362         } else {
363                 /* Exact Algorithm: Brute force */
364                 curr = bitset_alloca(unsafe_count);
365                 bitset_set_all(curr);
366                 while ((max = bitset_popcount(curr)) != 0) {
367                         /* check if curr is a stable set */
368                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
369                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
370                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
371                                                         goto no_stable_set;
372
373                         /* if we arrive here, we have a stable set */
374                         /* compute the weigth of the stable set*/
375                         curr_weight = 0;
376                         bitset_foreach(curr, pos)
377                                 curr_weight += unsafe_costs[pos];
378
379                         /* any better ? */
380                         if (curr_weight > best_weight) {
381                                 best_weight = curr_weight;
382                         }
383
384         no_stable_set:
385                         bitset_minus1(curr);
386                 }
387         }
388
389         return safe_costs+best_weight;
390 }
391
392 static void co_collect_units(ir_node *irn, void *env)
393 {
394         const arch_register_req_t *req = arch_get_register_req_out(irn);
395         copy_opt_t                *co  = env;
396         unit_t *unit;
397
398         if (req->cls != co->cls)
399                 return;
400         if (!co_is_optimizable_root(irn))
401                 return;
402
403         /* Init a new unit */
404         unit = XMALLOCZ(unit_t);
405         unit->co = co;
406         unit->node_count = 1;
407         INIT_LIST_HEAD(&unit->queue);
408
409         /* Phi with some/all of its arguments */
410         if (is_Reg_Phi(irn)) {
411                 int i, arity;
412
413                 /* init */
414                 arity = get_irn_arity(irn);
415                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
416                 unit->costs = XMALLOCN(int,      arity + 1);
417                 unit->nodes[0] = irn;
418
419                 /* fill */
420                 for (i=0; i<arity; ++i) {
421                         int o, arg_pos;
422                         ir_node *arg = get_irn_n(irn, i);
423
424                         assert(arch_get_irn_reg_class_out(arg) == co->cls && "Argument not in same register class.");
425                         if (arg == irn)
426                                 continue;
427                         if (nodes_interfere(co->cenv, irn, arg)) {
428                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
429                                 continue;
430                         }
431
432                         /* Else insert the argument of the phi to the members of this ou */
433                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
434
435                         if (arch_irn_is_ignore(arg))
436                                 continue;
437
438                         /* Check if arg has occurred at a prior position in the arg/list */
439                         arg_pos = 0;
440                         for (o=1; o<unit->node_count; ++o) {
441                                 if (unit->nodes[o] == arg) {
442                                         arg_pos = o;
443                                         break;
444                                 }
445                         }
446
447                         if (!arg_pos) { /* a new argument */
448                                 /* insert node, set costs */
449                                 unit->nodes[unit->node_count] = arg;
450                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
451                                 unit->node_count++;
452                         } else { /* arg has occurred before in same phi */
453                                 /* increase costs for existing arg */
454                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
455                         }
456                 }
457                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
458                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
459         } else if (is_Perm_Proj(irn)) {
460                 /* Proj of a perm with corresponding arg */
461                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
462                 unit->nodes = XMALLOCN(ir_node*, 2);
463                 unit->costs = XMALLOCN(int,      2);
464                 unit->node_count = 2;
465                 unit->nodes[0] = irn;
466                 unit->nodes[1] = get_Perm_src(irn);
467                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
468         } else {
469                 /* Src == Tgt of a 2-addr-code instruction */
470                 if (is_2addr_code(req)) {
471                         const unsigned other = req->other_same;
472                         int            count = 0;
473                         int            i;
474
475                         for (i = 0; (1U << i) <= other; ++i) {
476                                 if (other & (1U << i)) {
477                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
478                                         if (arch_irn_is_ignore(o))
479                                                 continue;
480                                         if (nodes_interfere(co->cenv, irn, o))
481                                                 continue;
482                                         ++count;
483                                 }
484                         }
485
486                         if (count != 0) {
487                                 int k = 0;
488                                 ++count;
489                                 unit->nodes = XMALLOCN(ir_node*, count);
490                                 unit->costs = XMALLOCN(int,      count);
491                                 unit->node_count = count;
492                                 unit->nodes[k++] = irn;
493
494                                 for (i = 0; 1U << i <= other; ++i) {
495                                         if (other & (1U << i)) {
496                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
497                                                 if (!arch_irn_is_ignore(o) &&
498                                                                 !nodes_interfere(co->cenv, irn, o)) {
499                                                         unit->nodes[k] = o;
500                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
501                                                         ++k;
502                                                 }
503                                         }
504                                 }
505                         }
506                 } else {
507                         assert(0 && "This is not an optimizable node!");
508                 }
509         }
510
511         /* Insert the new unit at a position according to its costs */
512         if (unit->node_count > 1) {
513                 int i;
514                 struct list_head *tmp;
515
516                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
517                 for (i=1; i<unit->node_count; ++i) {
518                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
519                         unit->all_nodes_costs += unit->costs[i];
520                 }
521
522                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
523                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
524                 /* Insert the new ou according to its sort_key */
525                 tmp = &co->units;
526                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
527                         tmp = tmp->next;
528                 list_add(&unit->units, tmp);
529         } else {
530                 free(unit);
531         }
532 }
533
534 #ifdef QUICK_AND_DIRTY_HACK
535
536 static int compare_ous(const void *k1, const void *k2)
537 {
538         const unit_t *u1 = *((const unit_t **) k1);
539         const unit_t *u2 = *((const unit_t **) k2);
540         int i, o, u1_has_constr, u2_has_constr;
541         arch_register_req_t req;
542
543         /* Units with constraints come first */
544         u1_has_constr = 0;
545         for (i=0; i<u1->node_count; ++i) {
546                 arch_get_register_req_out(&req, u1->nodes[i]);
547                 if (arch_register_req_is(&req, limited)) {
548                         u1_has_constr = 1;
549                         break;
550                 }
551         }
552
553         u2_has_constr = 0;
554         for (i=0; i<u2->node_count; ++i) {
555                 arch_get_register_req_out(&req, u2->nodes[i]);
556                 if (arch_register_req_is(&req, limited)) {
557                         u2_has_constr = 1;
558                         break;
559                 }
560         }
561
562         if (u1_has_constr != u2_has_constr)
563                 return u2_has_constr - u1_has_constr;
564
565         /* Now check, whether the two units are connected */
566 #if 0
567         for (i=0; i<u1->node_count; ++i)
568                 for (o=0; o<u2->node_count; ++o)
569                         if (u1->nodes[i] == u2->nodes[o])
570                                 return 0;
571 #endif
572
573         /* After all, the sort key decides. Greater keys come first. */
574         return u2->sort_key - u1->sort_key;
575
576 }
577
578 /**
579  * Sort the ou's according to constraints and their sort_key
580  */
581 static void co_sort_units(copy_opt_t *co)
582 {
583         int i, count = 0, costs;
584         unit_t *ou, **ous;
585
586         /* get the number of ous, remove them form the list and fill the array */
587         list_for_each_entry(unit_t, ou, &co->units, units)
588                 count++;
589         ous = ALLOCAN(unit_t, count);
590
591         costs = co_get_max_copy_costs(co);
592
593         i = 0;
594         list_for_each_entry(unit_t, ou, &co->units, units)
595                 ous[i++] = ou;
596
597         INIT_LIST_HEAD(&co->units);
598
599         assert(count == i && list_empty(&co->units));
600
601         for (i=0; i<count; ++i)
602                 ir_printf("%+F\n", ous[i]->nodes[0]);
603
604         qsort(ous, count, sizeof(*ous), compare_ous);
605
606         ir_printf("\n\n");
607         for (i=0; i<count; ++i)
608                 ir_printf("%+F\n", ous[i]->nodes[0]);
609
610         /* reinsert into list in correct order */
611         for (i=0; i<count; ++i)
612                 list_add_tail(&ous[i]->units, &co->units);
613
614         assert(costs == co_get_max_copy_costs(co));
615 }
616 #endif
617
618 void co_build_ou_structure(copy_opt_t *co)
619 {
620         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
621         INIT_LIST_HEAD(&co->units);
622         irg_walk_graph(co->irg, co_collect_units, NULL, co);
623 #ifdef QUICK_AND_DIRTY_HACK
624         co_sort_units(co);
625 #endif
626 }
627
628 void co_free_ou_structure(copy_opt_t *co)
629 {
630         unit_t *curr, *tmp;
631         ASSERT_OU_AVAIL(co);
632         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
633                 xfree(curr->nodes);
634                 xfree(curr->costs);
635                 xfree(curr);
636         }
637         co->units.next = NULL;
638 }
639
640 /* co_solve_heuristic() is implemented in becopyheur.c */
641
642 int co_get_max_copy_costs(const copy_opt_t *co)
643 {
644         int i, res = 0;
645         unit_t *curr;
646
647         ASSERT_OU_AVAIL(co);
648
649         list_for_each_entry(unit_t, curr, &co->units, units) {
650                 res += curr->inevitable_costs;
651                 for (i=1; i<curr->node_count; ++i)
652                         res += curr->costs[i];
653         }
654         return res;
655 }
656
657 int co_get_inevit_copy_costs(const copy_opt_t *co)
658 {
659         int res = 0;
660         unit_t *curr;
661
662         ASSERT_OU_AVAIL(co);
663
664         list_for_each_entry(unit_t, curr, &co->units, units)
665                 res += curr->inevitable_costs;
666         return res;
667 }
668
669 int co_get_copy_costs(const copy_opt_t *co)
670 {
671         int i, res = 0;
672         unit_t *curr;
673
674         ASSERT_OU_AVAIL(co);
675
676         list_for_each_entry(unit_t, curr, &co->units, units) {
677                 int root_col = get_irn_col(curr->nodes[0]);
678                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
679                 res += curr->inevitable_costs;
680                 for (i=1; i<curr->node_count; ++i) {
681                         int arg_col = get_irn_col(curr->nodes[i]);
682                         if (root_col != arg_col) {
683                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
684                                 res += curr->costs[i];
685                         }
686                 }
687         }
688         return res;
689 }
690
691 int co_get_lower_bound(const copy_opt_t *co)
692 {
693         int res = 0;
694         unit_t *curr;
695
696         ASSERT_OU_AVAIL(co);
697
698         list_for_each_entry(unit_t, curr, &co->units, units)
699                 res += curr->inevitable_costs + curr->min_nodes_costs;
700         return res;
701 }
702
703 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
704 {
705         bitset_t *seen = bitset_irg_malloc(co->irg);
706         affinity_node_t *an;
707
708         memset(stat, 0, sizeof(stat[0]));
709
710         /* count affinity edges. */
711         co_gs_foreach_aff_node(co, an) {
712                 neighb_t *neigh;
713                 stat->aff_nodes += 1;
714                 bitset_add_irn(seen, an->irn);
715                 co_gs_foreach_neighb(an, neigh) {
716                         if (!bitset_contains_irn(seen, neigh->irn)) {
717                                 stat->aff_edges += 1;
718                                 stat->max_costs += neigh->costs;
719
720                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
721                                         stat->costs += neigh->costs;
722                                         stat->unsatisfied_edges += 1;
723                                 }
724
725                                 if (nodes_interfere(co->cenv, an->irn, neigh->irn)) {
726                                         stat->aff_int += 1;
727                                         stat->inevit_costs += neigh->costs;
728                                 }
729
730                         }
731                 }
732         }
733
734         bitset_free(seen);
735 }
736
737 /******************************************************************************
738    _____                 _        _____ _
739   / ____|               | |      / ____| |
740  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
741  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
742  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
743   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
744                   | |                                       __/ |
745                   |_|                                      |___/
746  ******************************************************************************/
747
748 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size)
749 {
750         const affinity_node_t *n1 = k1;
751         const affinity_node_t *n2 = k2;
752         (void) size;
753
754         return (n1->irn != n2->irn);
755 }
756
757 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
758 {
759         affinity_node_t new_node, *node;
760         neighb_t        *nbr;
761         int             allocnew = 1;
762
763         new_node.irn        = n1;
764         new_node.degree     = 0;
765         new_node.neighbours = NULL;
766         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
767
768         for (nbr = node->neighbours; nbr; nbr = nbr->next)
769                 if (nbr->irn == n2) {
770                         allocnew = 0;
771                         break;
772                 }
773
774         /* if we did not find n2 in n1's neighbourhood insert it */
775         if (allocnew) {
776                 nbr        = OALLOC(&co->obst, neighb_t);
777                 nbr->irn   = n2;
778                 nbr->costs = 0;
779                 nbr->next  = node->neighbours;
780
781                 node->neighbours = nbr;
782                 node->degree++;
783         }
784
785         /* now nbr points to n1's neighbour-entry of n2 */
786         nbr->costs += costs;
787 }
788
789 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
790 {
791         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
792                 add_edge(co, n1, n2, costs);
793                 add_edge(co, n2, n1, costs);
794         }
795 }
796
797 static void build_graph_walker(ir_node *irn, void *env)
798 {
799         const arch_register_req_t *req = arch_get_register_req_out(irn);
800         copy_opt_t                *co  = env;
801         int pos, max;
802         const arch_register_t *reg;
803
804         if (req->cls != co->cls || arch_irn_is_ignore(irn))
805                 return;
806
807         reg = arch_get_irn_register(irn);
808         if (arch_register_type_is(reg, ignore))
809                 return;
810
811         if (is_Reg_Phi(irn)) { /* Phis */
812                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
813                         ir_node *arg = get_irn_n(irn, pos);
814                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
815                 }
816         } else if (is_Perm_Proj(irn)) { /* Perms */
817                 ir_node *arg = get_Perm_src(irn);
818                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
819         } else { /* 2-address code */
820                 if (is_2addr_code(req)) {
821                         const unsigned other = req->other_same;
822                         int i;
823
824                         for (i = 0; 1U << i <= other; ++i) {
825                                 if (other & (1U << i)) {
826                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
827                                         if (!arch_irn_is_ignore(other))
828                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
829                                 }
830                         }
831                 }
832         }
833 }
834
835 void co_build_graph_structure(copy_opt_t *co)
836 {
837         obstack_init(&co->obst);
838         co->nodes = new_set(compare_affinity_node_t, 32);
839
840         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
841 }
842
843 void co_free_graph_structure(copy_opt_t *co)
844 {
845         ASSERT_GS_AVAIL(co);
846
847         del_set(co->nodes);
848         obstack_free(&co->obst, NULL);
849         co->nodes = NULL;
850 }
851
852 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
853
854 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn)
855 {
856         affinity_node_t new_node, *n;
857
858         ASSERT_GS_AVAIL(co);
859
860         new_node.irn = irn;
861         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
862         if (n) {
863                 return (n->degree > 0);
864         } else
865                 return 0;
866 }
867
868 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
869 {
870         ir_node *nodes[]  = { a, b };
871         bitset_t *constr[] = { NULL, NULL };
872         int j;
873
874         constr[0] = bitset_alloca(co->cls->n_regs);
875         constr[1] = bitset_alloca(co->cls->n_regs);
876
877         for (j = 0; j < 2; ++j) {
878                 const arch_register_req_t *req = arch_get_register_req_out(nodes[j]);
879                 if (arch_register_req_is(req, limited))
880                         rbitset_copy_to_bitset(req->limited, constr[j]);
881                 else
882                         bitset_set_all(constr[j]);
883
884         }
885
886         return !bitset_intersect(constr[0], constr[1]);
887 }
888
889 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
890 {
891         be_ifg_t *ifg       = co->cenv->ifg;
892         int      *color_map = ALLOCAN(int, co->cls->n_regs);
893         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
894
895         ir_node *irn;
896         void *it, *nit;
897         int n, n_regs;
898         unsigned i;
899
900         n_regs = 0;
901         for (i = 0; i < co->cls->n_regs; ++i) {
902                 const arch_register_t *reg = &co->cls->regs[i];
903                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
904         }
905
906         /*
907          * n contains the first node number.
908          * the values below n are the pre-colored register nodes
909          */
910
911         it  = be_ifg_nodes_iter_alloca(ifg);
912         nit = be_ifg_neighbours_iter_alloca(ifg);
913
914         n = n_regs;
915         be_ifg_foreach_node(ifg, it, irn) {
916                 if (arch_irn_is_ignore(irn))
917                         continue;
918                 node_map[get_irn_idx(irn)] = n++;
919         }
920
921         fprintf(f, "%d %d\n", n, n_regs);
922
923         be_ifg_foreach_node(ifg, it, irn) {
924                 if (!arch_irn_is_ignore(irn)) {
925                         int idx                        = node_map[get_irn_idx(irn)];
926                         affinity_node_t           *a   = get_affinity_info(co, irn);
927                         const arch_register_req_t *req = arch_get_register_req_out(irn);
928                         ir_node                   *adj;
929
930                         if (arch_register_req_is(req, limited)) {
931                                 for (i = 0; i < co->cls->n_regs; ++i) {
932                                         if (!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
933                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
934                                 }
935                         }
936
937                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
938                                 if (!arch_irn_is_ignore(adj) &&
939                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
940                                         int adj_idx = node_map[get_irn_idx(adj)];
941                                         if (idx < adj_idx)
942                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
943                                 }
944                         }
945
946                         if (a) {
947                                 neighb_t *n;
948
949                                 co_gs_foreach_neighb(a, n) {
950                                         if (!arch_irn_is_ignore(n->irn)) {
951                                                 int n_idx = node_map[get_irn_idx(n->irn)];
952                                                 if (idx < n_idx)
953                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
954                                         }
955                                 }
956                         }
957                 }
958         }
959
960         xfree(node_map);
961 }
962
963 /*
964          ___ _____ ____   ____   ___ _____   ____                        _
965         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
966          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
967          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
968         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
969                                                                   |_|            |___/
970 */
971
972 static const char *get_dot_color_name(size_t col)
973 {
974         static const char *names[] = {
975                 "blue",
976                 "red",
977                 "green",
978                 "yellow",
979                 "cyan",
980                 "magenta",
981                 "orange",
982                 "chocolate",
983                 "beige",
984                 "navy",
985                 "darkgreen",
986                 "darkred",
987                 "lightPink",
988                 "chartreuse",
989                 "lightskyblue",
990                 "linen",
991                 "pink",
992                 "lightslateblue",
993                 "mintcream",
994                 "red",
995                 "darkolivegreen",
996                 "mediumblue",
997                 "mistyrose",
998                 "salmon",
999                 "darkseagreen",
1000                 "mediumslateblue"
1001                 "moccasin",
1002                 "tomato",
1003                 "forestgreen",
1004                 "darkturquoise",
1005                 "palevioletred"
1006         };
1007
1008         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1009 }
1010
1011 typedef struct _co_ifg_dump_t {
1012         const copy_opt_t *co;
1013         unsigned flags;
1014 } co_ifg_dump_t;
1015
1016 static void ifg_dump_graph_attr(FILE *f, void *self)
1017 {
1018         (void) self;
1019         fprintf(f, "overlap=scale");
1020 }
1021
1022 static int ifg_is_dump_node(void *self, ir_node *irn)
1023 {
1024         (void)self;
1025         return !arch_irn_is_ignore(irn);
1026 }
1027
1028 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1029 {
1030         co_ifg_dump_t             *env     = self;
1031         const arch_register_t     *reg     = arch_get_irn_register(irn);
1032         const arch_register_req_t *req     = arch_get_register_req_out(irn);
1033         int                        limited = arch_register_req_is(req, limited);
1034
1035         if (env->flags & CO_IFG_DUMP_LABELS) {
1036                 ir_fprintf(f, "label=\"%+F", irn);
1037
1038                 if ((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1039                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1040                         rbitset_copy_to_bitset(req->limited, bs);
1041                         ir_fprintf(f, "\\n%B", bs);
1042                 }
1043                 ir_fprintf(f, "\" ");
1044         } else {
1045                 fprintf(f, "label=\"\" shape=point " );
1046         }
1047
1048         if (env->flags & CO_IFG_DUMP_SHAPE)
1049                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1050
1051         if (env->flags & CO_IFG_DUMP_COLORS)
1052                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1053 }
1054
1055 static void ifg_dump_at_end(FILE *file, void *self)
1056 {
1057         co_ifg_dump_t *env = self;
1058         affinity_node_t *a;
1059
1060         co_gs_foreach_aff_node(env->co, a) {
1061                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1062                 unsigned aidx = get_irn_idx(a->irn);
1063                 neighb_t *n;
1064
1065                 co_gs_foreach_neighb(a, n) {
1066                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1067                         unsigned nidx = get_irn_idx(n->irn);
1068
1069                         if (aidx < nidx) {
1070                                 const char *color = nr == ar ? "blue" : "red";
1071                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1072                                 if (env->flags & CO_IFG_DUMP_LABELS)
1073                                         fprintf(file, "label=\"%d\" ", n->costs);
1074                                 if (env->flags & CO_IFG_DUMP_COLORS)
1075                                         fprintf(file, "color=%s ", color);
1076                                 else
1077                                         fprintf(file, "style=dotted");
1078                                 fprintf(file, "];\n");
1079                         }
1080                 }
1081         }
1082 }
1083
1084
1085 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1086         ifg_is_dump_node,
1087         ifg_dump_graph_attr,
1088         ifg_dump_node_attr,
1089         NULL,
1090         NULL,
1091         ifg_dump_at_end
1092 };
1093
1094
1095
1096 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1097 {
1098         co_ifg_dump_t cod;
1099
1100         cod.co    = co;
1101         cod.flags = flags;
1102         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1103 }
1104
1105
1106 void co_solve_park_moon(copy_opt_t *opt)
1107 {
1108         (void) opt;
1109 }
1110
1111 /*
1112     __  __       _         ____       _
1113    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1114    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1115    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1116    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1117
1118 */
1119
1120 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1121 {
1122         FILE *result;
1123         char buf[1024];
1124         size_t i, n;
1125         char *tu_name;
1126
1127         n = strlen(env->birg->main_env->cup_name);
1128         tu_name = XMALLOCN(char, n + 1);
1129         strcpy(tu_name, env->birg->main_env->cup_name);
1130         for (i = 0; i < n; ++i)
1131                 if (tu_name[i] == '.')
1132                         tu_name[i] = '_';
1133
1134
1135         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1136         xfree(tu_name);
1137         result = fopen(buf, "wt");
1138         if (result == NULL) {
1139                 panic("Couldn't open '%s' for writing.", buf);
1140         }
1141
1142         return result;
1143 }
1144
1145 void co_driver(be_chordal_env_t *cenv)
1146 {
1147         ir_timer_t          *timer = ir_timer_new();
1148         co_complete_stats_t before, after;
1149         copy_opt_t          *co;
1150         int                 was_optimal = 0;
1151
1152         assert(selected_copyopt);
1153
1154         /* skip copymin if algo is 'none' */
1155         if (selected_copyopt->copyopt == void_algo)
1156                 return;
1157
1158         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1159
1160         co = new_copy_opt(cenv, cost_func);
1161         co_build_ou_structure(co);
1162         co_build_graph_structure(co);
1163
1164         co_complete_stats(co, &before);
1165
1166         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1167         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1168         be_stat_ev_ull("co_max_costs",    before.max_costs);
1169         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1170         be_stat_ev_ull("co_aff_int",      before.aff_int);
1171
1172         be_stat_ev_ull("co_init_costs",   before.costs);
1173         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1174
1175         if (dump_flags & DUMP_BEFORE) {
1176                 FILE *f = my_open(cenv, "", "-before.dot");
1177                 co_dump_ifg_dot(co, f, style_flags);
1178                 fclose(f);
1179         }
1180
1181         /* if the algo can improve results, provide an initial solution with heur1 */
1182         if (improve && selected_copyopt->can_improve_existing) {
1183                 co_complete_stats_t stats;
1184
1185                 /* produce a heuristic solution */
1186                 co_solve_heuristic(co);
1187
1188                 /* do the stats and provide the current costs */
1189                 co_complete_stats(co, &stats);
1190                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1191         }
1192
1193         /* perform actual copy minimization */
1194         ir_timer_reset_and_start(timer);
1195         was_optimal = selected_copyopt->copyopt(co);
1196         ir_timer_stop(timer);
1197
1198         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1199         be_stat_ev_ull("co_optimal", was_optimal);
1200         ir_timer_free(timer);
1201
1202         if (dump_flags & DUMP_AFTER) {
1203                 FILE *f = my_open(cenv, "", "-after.dot");
1204                 co_dump_ifg_dot(co, f, style_flags);
1205                 fclose(f);
1206         }
1207
1208         co_complete_stats(co, &after);
1209
1210         if (do_stats) {
1211                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1212                 ulong64 evitable          = after.costs     - after.inevit_costs;
1213
1214                 ir_printf("%30F ", cenv->irg);
1215                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1216
1217                 if (optimizable_costs > 0)
1218                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1219                 else
1220                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1221         }
1222
1223         /* Dump the interference graph in Appel's format. */
1224         if (dump_flags & DUMP_APPEL) {
1225                 FILE *f = my_open(cenv, "", ".apl");
1226                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1227                 co_dump_appel_graph(co, f);
1228                 fclose(f);
1229         }
1230
1231         be_stat_ev_ull("co_after_costs", after.costs);
1232         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1233
1234         co_free_graph_structure(co);
1235         co_free_ou_structure(co);
1236         free_copy_opt(co);
1237 }