Use module mechanism to register copy minimization algorithms.
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "irbitset.h"
46 #include "irphase_t.h"
47 #include "irprintf_t.h"
48
49 #include "bemodule.h"
50 #include "bearch.h"
51 #include "benode_t.h"
52 #include "beutil.h"
53 #include "beifg_t.h"
54 #include "beintlive_t.h"
55 #include "becopyopt_t.h"
56 #include "becopystat.h"
57 #include "belive_t.h"
58 #include "beinsn_t.h"
59 #include "besched.h"
60 #include "bestatevent.h"
61 #include "beirg.h"
62 #include "error.h"
63
64 #include "lc_opts.h"
65 #include "lc_opts_enum.h"
66
67 #define DUMP_BEFORE 1
68 #define DUMP_AFTER  2
69 #define DUMP_APPEL  4
70 #define DUMP_ALL    2 * DUMP_APPEL - 1
71
72 #define COST_FUNC_FREQ     1
73 #define COST_FUNC_LOOP     2
74 #define COST_FUNC_ALL_ONE  3
75
76 static unsigned   dump_flags  = 0;
77 static unsigned   style_flags = 0;
78 static unsigned   do_stats    = 0;
79 static cost_fct_t cost_func   = co_get_costs_exec_freq;
80 static int        improve     = 1;
81
82 static const lc_opt_enum_mask_items_t dump_items[] = {
83         { "before",  DUMP_BEFORE },
84         { "after",   DUMP_AFTER  },
85         { "appel",   DUMP_APPEL  },
86         { "all",     DUMP_ALL    },
87         { NULL,      0 }
88 };
89
90 static const lc_opt_enum_mask_items_t style_items[] = {
91         { "color",   CO_IFG_DUMP_COLORS },
92         { "labels",  CO_IFG_DUMP_LABELS },
93         { "constr",  CO_IFG_DUMP_CONSTR },
94         { "shape",   CO_IFG_DUMP_SHAPE  },
95         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
96         { NULL,      0 }
97 };
98
99 typedef int (*opt_funcptr)(void);
100
101 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
102         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
103         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
104         { "one",    (opt_funcptr) co_get_costs_all_one },
105         { NULL,     NULL }
106 };
107
108 static lc_opt_enum_mask_var_t dump_var = {
109         &dump_flags, dump_items
110 };
111
112 static lc_opt_enum_mask_var_t style_var = {
113         &style_flags, style_items
114 };
115
116 static lc_opt_enum_func_ptr_var_t cost_func_var = {
117         (opt_funcptr*) &cost_func, cost_func_items
118 };
119
120 static const lc_opt_table_entry_t options[] = {
121         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
122         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
123         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
124         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
125         LC_OPT_ENT_BOOL          ("improve", "run heur1 before if algo can exploit start solutions",    &improve),
126         LC_OPT_LAST
127 };
128
129 static be_module_list_entry_t *copyopts = NULL;
130 static const co_algo_info *selected_copyopt = NULL;
131
132 void be_register_copyopt(const char *name, co_algo_info *copyopt)
133 {
134         if (selected_copyopt == NULL)
135                 selected_copyopt = copyopt;
136         be_add_module_to_list(&copyopts, name, copyopt);
137 }
138
139 void be_init_copyopt(void)
140 {
141         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
142         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
143         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
144         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
145
146         lc_opt_add_table(co_grp, options);
147         be_add_module_list_opt(co_grp, "algo", "select copy optimization algo",
148                                        &copyopts, (void**) &selected_copyopt);
149 }
150
151 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copyopt);
152
153 static int void_algo(copy_opt_t *co)
154 {
155         (void) co;
156         return 0;
157 }
158
159 void be_init_copynone(void)
160 {
161         static co_algo_info copyheur = {
162                 void_algo, 0
163         };
164
165         be_register_copyopt("none", &copyheur);
166 }
167
168 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copynone);
169
170 #undef QUICK_AND_DIRTY_HACK
171
172 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
173 {
174         if (env->ifg)
175                 return be_ifg_connected(env->ifg, a, b);
176         else
177                 return be_values_interfere(env->birg->lv, a, b);
178 }
179
180
181 /******************************************************************************
182     _____                           _
183    / ____|                         | |
184   | |  __  ___ _ __   ___ _ __ __ _| |
185   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
186   | |__| |  __/ | | |  __/ | | (_| | |
187    \_____|\___|_| |_|\___|_|  \__,_|_|
188
189  ******************************************************************************/
190
191 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
192
193
194 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
195 {
196         const char *s1, *s2, *s3;
197         int len;
198         copy_opt_t *co;
199
200         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
201
202         co = XMALLOCZ(copy_opt_t);
203         co->cenv      = chordal_env;
204         co->irg       = chordal_env->irg;
205         co->cls       = chordal_env->cls;
206         co->get_costs = get_costs;
207
208         s1 = get_irp_name();
209         s2 = get_entity_name(get_irg_entity(co->irg));
210         s3 = chordal_env->cls->name;
211         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
212         co->name = XMALLOCN(char, len);
213         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
214
215         return co;
216 }
217
218 void free_copy_opt(copy_opt_t *co) {
219         xfree(co->name);
220         free(co);
221 }
222
223 /**
224  * Checks if a node is optimizable, viz. has something to do with coalescing
225  * @param irn  The irn to check
226  */
227 static int co_is_optimizable_root(ir_node *irn)
228 {
229         const arch_register_req_t *req;
230         const arch_register_t     *reg;
231
232         if (arch_irn_is_ignore(irn))
233                 return 0;
234
235         reg = arch_get_irn_register(irn);
236         if (arch_register_type_is(reg, ignore))
237                 return 0;
238
239         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
240                 return 1;
241
242         req = arch_get_register_req_out(irn);
243         if (is_2addr_code(req))
244                 return 1;
245
246         return 0;
247 }
248
249 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
250         int cost = 0;
251         ir_loop *loop;
252         ir_node *root_block = get_nodes_block(root);
253         (void) co;
254         (void) arg;
255
256         if (is_Phi(root)) {
257                 /* for phis the copies are placed in the corresponding pred-block */
258                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
259         } else {
260                 /* a perm places the copy in the same block as it resides */
261                 loop = get_irn_loop(root_block);
262         }
263         if (loop) {
264                 int d = get_loop_depth(loop);
265                 cost = d*d;
266         }
267         return 1+cost;
268 }
269
270 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
271         int res;
272         ir_node *root_bl = get_nodes_block(root);
273         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
274         (void) arg;
275         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
276
277         /* don't allow values smaller than one. */
278         return res < 1 ? 1 : res;
279 }
280
281
282 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
283         (void) co;
284         (void) root;
285         (void) arg;
286         (void) pos;
287         return 1;
288 }
289
290 /******************************************************************************
291    ____        _   _    _       _ _          _____ _
292   / __ \      | | | |  | |     (_) |        / ____| |
293  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
294  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
295  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
296   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
297         | |                                                            __/ |
298         |_|                                                           |___/
299  ******************************************************************************/
300
301 /**
302  * Determines a maximum weighted independent set with respect to
303  * the interference and conflict edges of all nodes in a qnode.
304  */
305 static int ou_max_ind_set_costs(unit_t *ou) {
306         be_chordal_env_t *chordal_env = ou->co->cenv;
307         ir_node **safe, **unsafe;
308         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
309         bitset_t *curr;
310         bitset_pos_t pos;
311         int max, curr_weight, best_weight = 0;
312
313         /* assign the nodes into two groups.
314          * safe: node has no interference, hence it is in every max stable set.
315          * unsafe: node has an interference
316          */
317         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
318         safe_costs   = 0;
319         safe_count   = 0;
320         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
321         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
322         unsafe_count = 0;
323         for(i=1; i<ou->node_count; ++i) {
324                 int is_safe = 1;
325                 for(o=1; o<ou->node_count; ++o) {
326                         if (i==o)
327                                 continue;
328                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
329                                 unsafe_costs[unsafe_count] = ou->costs[i];
330                                 unsafe[unsafe_count] = ou->nodes[i];
331                                 ++unsafe_count;
332                                 is_safe = 0;
333                                 break;
334                         }
335                 }
336                 if (is_safe) {
337                         safe_costs += ou->costs[i];
338                         safe[safe_count++] = ou->nodes[i];
339                 }
340         }
341
342
343         /* now compute the best set out of the unsafe nodes*/
344         if (unsafe_count > MIS_HEUR_TRIGGER) {
345                 bitset_t *best = bitset_alloca(unsafe_count);
346                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
347                 for (i=0; i<unsafe_count; ++i) {
348                         bitset_set(best, i);
349                         /* check if it is a stable set */
350                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
351                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
352                                         bitset_clear(best, i); /* clear the bit and try next one */
353                                         break;
354                                 }
355                 }
356                 /* compute the weight */
357                 bitset_foreach(best, pos)
358                         best_weight += unsafe_costs[pos];
359         } else {
360                 /* Exact Algorithm: Brute force */
361                 curr = bitset_alloca(unsafe_count);
362                 bitset_set_all(curr);
363                 while ((max = bitset_popcnt(curr)) != 0) {
364                         /* check if curr is a stable set */
365                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
366                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
367                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
368                                                         goto no_stable_set;
369
370                         /* if we arrive here, we have a stable set */
371                         /* compute the weigth of the stable set*/
372                         curr_weight = 0;
373                         bitset_foreach(curr, pos)
374                                 curr_weight += unsafe_costs[pos];
375
376                         /* any better ? */
377                         if (curr_weight > best_weight) {
378                                 best_weight = curr_weight;
379                         }
380
381         no_stable_set:
382                         bitset_minus1(curr);
383                 }
384         }
385
386         return safe_costs+best_weight;
387 }
388
389 static void co_collect_units(ir_node *irn, void *env)
390 {
391         const arch_register_req_t *req = arch_get_register_req_out(irn);
392         copy_opt_t                *co  = env;
393         unit_t *unit;
394
395         if (req->cls != co->cls)
396                 return;
397         if (!co_is_optimizable_root(irn))
398                 return;
399
400         /* Init a new unit */
401         unit = XMALLOCZ(unit_t);
402         unit->co = co;
403         unit->node_count = 1;
404         INIT_LIST_HEAD(&unit->queue);
405
406         /* Phi with some/all of its arguments */
407         if (is_Reg_Phi(irn)) {
408                 int i, arity;
409
410                 /* init */
411                 arity = get_irn_arity(irn);
412                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
413                 unit->costs = XMALLOCN(int,      arity + 1);
414                 unit->nodes[0] = irn;
415
416                 /* fill */
417                 for (i=0; i<arity; ++i) {
418                         int o, arg_pos;
419                         ir_node *arg = get_irn_n(irn, i);
420
421                         assert(arch_get_irn_reg_class_out(arg) == co->cls && "Argument not in same register class.");
422                         if (arg == irn)
423                                 continue;
424                         if (nodes_interfere(co->cenv, irn, arg)) {
425                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
426                                 continue;
427                         }
428
429                         /* Else insert the argument of the phi to the members of this ou */
430                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
431
432                         if (arch_irn_is_ignore(arg))
433                                 continue;
434
435                         /* Check if arg has occurred at a prior position in the arg/list */
436                         arg_pos = 0;
437                         for (o=1; o<unit->node_count; ++o) {
438                                 if (unit->nodes[o] == arg) {
439                                         arg_pos = o;
440                                         break;
441                                 }
442                         }
443
444                         if (!arg_pos) { /* a new argument */
445                                 /* insert node, set costs */
446                                 unit->nodes[unit->node_count] = arg;
447                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
448                                 unit->node_count++;
449                         } else { /* arg has occurred before in same phi */
450                                 /* increase costs for existing arg */
451                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
452                         }
453                 }
454                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
455                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
456         } else if (is_Perm_Proj(irn)) {
457                 /* Proj of a perm with corresponding arg */
458                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
459                 unit->nodes = XMALLOCN(ir_node*, 2);
460                 unit->costs = XMALLOCN(int,      2);
461                 unit->node_count = 2;
462                 unit->nodes[0] = irn;
463                 unit->nodes[1] = get_Perm_src(irn);
464                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
465         } else {
466                 /* Src == Tgt of a 2-addr-code instruction */
467                 if (is_2addr_code(req)) {
468                         const unsigned other = req->other_same;
469                         int            count = 0;
470                         int            i;
471
472                         for (i = 0; (1U << i) <= other; ++i) {
473                                 if (other & (1U << i)) {
474                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
475                                         if (arch_irn_is_ignore(o))
476                                                 continue;
477                                         if (nodes_interfere(co->cenv, irn, o))
478                                                 continue;
479                                         ++count;
480                                 }
481                         }
482
483                         if (count != 0) {
484                                 int k = 0;
485                                 ++count;
486                                 unit->nodes = XMALLOCN(ir_node*, count);
487                                 unit->costs = XMALLOCN(int,      count);
488                                 unit->node_count = count;
489                                 unit->nodes[k++] = irn;
490
491                                 for (i = 0; 1U << i <= other; ++i) {
492                                         if (other & (1U << i)) {
493                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
494                                                 if (!arch_irn_is_ignore(o) &&
495                                                                 !nodes_interfere(co->cenv, irn, o)) {
496                                                         unit->nodes[k] = o;
497                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
498                                                         ++k;
499                                                 }
500                                         }
501                                 }
502                         }
503                 } else {
504                         assert(0 && "This is not an optimizable node!");
505                 }
506         }
507
508         /* Insert the new unit at a position according to its costs */
509         if (unit->node_count > 1) {
510                 int i;
511                 struct list_head *tmp;
512
513                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
514                 for(i=1; i<unit->node_count; ++i) {
515                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
516                         unit->all_nodes_costs += unit->costs[i];
517                 }
518
519                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
520                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
521                 /* Insert the new ou according to its sort_key */
522                 tmp = &co->units;
523                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
524                         tmp = tmp->next;
525                 list_add(&unit->units, tmp);
526         } else {
527                 free(unit);
528         }
529 }
530
531 #ifdef QUICK_AND_DIRTY_HACK
532
533 static int compare_ous(const void *k1, const void *k2) {
534         const unit_t *u1 = *((const unit_t **) k1);
535         const unit_t *u2 = *((const unit_t **) k2);
536         int i, o, u1_has_constr, u2_has_constr;
537         arch_register_req_t req;
538
539         /* Units with constraints come first */
540         u1_has_constr = 0;
541         for (i=0; i<u1->node_count; ++i) {
542                 arch_get_register_req_out(&req, u1->nodes[i]);
543                 if (arch_register_req_is(&req, limited)) {
544                         u1_has_constr = 1;
545                         break;
546                 }
547         }
548
549         u2_has_constr = 0;
550         for (i=0; i<u2->node_count; ++i) {
551                 arch_get_register_req_out(&req, u2->nodes[i]);
552                 if (arch_register_req_is(&req, limited)) {
553                         u2_has_constr = 1;
554                         break;
555                 }
556         }
557
558         if (u1_has_constr != u2_has_constr)
559                 return u2_has_constr - u1_has_constr;
560
561         /* Now check, whether the two units are connected */
562 #if 0
563         for (i=0; i<u1->node_count; ++i)
564                 for (o=0; o<u2->node_count; ++o)
565                         if (u1->nodes[i] == u2->nodes[o])
566                                 return 0;
567 #endif
568
569         /* After all, the sort key decides. Greater keys come first. */
570         return u2->sort_key - u1->sort_key;
571
572 }
573
574 /**
575  * Sort the ou's according to constraints and their sort_key
576  */
577 static void co_sort_units(copy_opt_t *co) {
578         int i, count = 0, costs;
579         unit_t *ou, **ous;
580
581         /* get the number of ous, remove them form the list and fill the array */
582         list_for_each_entry(unit_t, ou, &co->units, units)
583                 count++;
584         ous = ALLOCAN(unit_t, count);
585
586         costs = co_get_max_copy_costs(co);
587
588         i = 0;
589         list_for_each_entry(unit_t, ou, &co->units, units)
590                 ous[i++] = ou;
591
592         INIT_LIST_HEAD(&co->units);
593
594         assert(count == i && list_empty(&co->units));
595
596         for (i=0; i<count; ++i)
597                 ir_printf("%+F\n", ous[i]->nodes[0]);
598
599         qsort(ous, count, sizeof(*ous), compare_ous);
600
601         ir_printf("\n\n");
602         for (i=0; i<count; ++i)
603                 ir_printf("%+F\n", ous[i]->nodes[0]);
604
605         /* reinsert into list in correct order */
606         for (i=0; i<count; ++i)
607                 list_add_tail(&ous[i]->units, &co->units);
608
609         assert(costs == co_get_max_copy_costs(co));
610 }
611 #endif
612
613 void co_build_ou_structure(copy_opt_t *co) {
614         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
615         INIT_LIST_HEAD(&co->units);
616         irg_walk_graph(co->irg, co_collect_units, NULL, co);
617 #ifdef QUICK_AND_DIRTY_HACK
618         co_sort_units(co);
619 #endif
620 }
621
622 void co_free_ou_structure(copy_opt_t *co) {
623         unit_t *curr, *tmp;
624         ASSERT_OU_AVAIL(co);
625         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
626                 xfree(curr->nodes);
627                 xfree(curr->costs);
628                 xfree(curr);
629         }
630         co->units.next = NULL;
631 }
632
633 /* co_solve_heuristic() is implemented in becopyheur.c */
634
635 int co_get_max_copy_costs(const copy_opt_t *co) {
636         int i, res = 0;
637         unit_t *curr;
638
639         ASSERT_OU_AVAIL(co);
640
641         list_for_each_entry(unit_t, curr, &co->units, units) {
642                 res += curr->inevitable_costs;
643                 for (i=1; i<curr->node_count; ++i)
644                         res += curr->costs[i];
645         }
646         return res;
647 }
648
649 int co_get_inevit_copy_costs(const copy_opt_t *co) {
650         int res = 0;
651         unit_t *curr;
652
653         ASSERT_OU_AVAIL(co);
654
655         list_for_each_entry(unit_t, curr, &co->units, units)
656                 res += curr->inevitable_costs;
657         return res;
658 }
659
660 int co_get_copy_costs(const copy_opt_t *co) {
661         int i, res = 0;
662         unit_t *curr;
663
664         ASSERT_OU_AVAIL(co);
665
666         list_for_each_entry(unit_t, curr, &co->units, units) {
667                 int root_col = get_irn_col(curr->nodes[0]);
668                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
669                 res += curr->inevitable_costs;
670                 for (i=1; i<curr->node_count; ++i) {
671                         int arg_col = get_irn_col(curr->nodes[i]);
672                         if (root_col != arg_col) {
673                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
674                                 res += curr->costs[i];
675                         }
676                 }
677         }
678         return res;
679 }
680
681 int co_get_lower_bound(const copy_opt_t *co) {
682         int res = 0;
683         unit_t *curr;
684
685         ASSERT_OU_AVAIL(co);
686
687         list_for_each_entry(unit_t, curr, &co->units, units)
688                 res += curr->inevitable_costs + curr->min_nodes_costs;
689         return res;
690 }
691
692 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
693 {
694         bitset_t *seen = bitset_irg_malloc(co->irg);
695         affinity_node_t *an;
696
697         memset(stat, 0, sizeof(stat[0]));
698
699         /* count affinity edges. */
700         co_gs_foreach_aff_node(co, an) {
701                 neighb_t *neigh;
702                 stat->aff_nodes += 1;
703                 bitset_add_irn(seen, an->irn);
704                 co_gs_foreach_neighb(an, neigh) {
705                         if(!bitset_contains_irn(seen, neigh->irn)) {
706                                 stat->aff_edges += 1;
707                                 stat->max_costs += neigh->costs;
708
709                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
710                                         stat->costs += neigh->costs;
711                                         stat->unsatisfied_edges += 1;
712                                 }
713
714                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
715                                         stat->aff_int += 1;
716                                         stat->inevit_costs += neigh->costs;
717                                 }
718
719                         }
720                 }
721         }
722
723         bitset_free(seen);
724 }
725
726 /******************************************************************************
727    _____                 _        _____ _
728   / ____|               | |      / ____| |
729  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
730  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
731  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
732   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
733                   | |                                       __/ |
734                   |_|                                      |___/
735  ******************************************************************************/
736
737 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
738         const affinity_node_t *n1 = k1;
739         const affinity_node_t *n2 = k2;
740         (void) size;
741
742         return (n1->irn != n2->irn);
743 }
744
745 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
746         affinity_node_t new_node, *node;
747         neighb_t        *nbr;
748         int             allocnew = 1;
749
750         new_node.irn        = n1;
751         new_node.degree     = 0;
752         new_node.neighbours = NULL;
753         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
754
755         for (nbr = node->neighbours; nbr; nbr = nbr->next)
756                 if (nbr->irn == n2) {
757                         allocnew = 0;
758                         break;
759                 }
760
761         /* if we did not find n2 in n1's neighbourhood insert it */
762         if (allocnew) {
763                 nbr        = OALLOC(&co->obst, neighb_t);
764                 nbr->irn   = n2;
765                 nbr->costs = 0;
766                 nbr->next  = node->neighbours;
767
768                 node->neighbours = nbr;
769                 node->degree++;
770         }
771
772         /* now nbr points to n1's neighbour-entry of n2 */
773         nbr->costs += costs;
774 }
775
776 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
777         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
778                 add_edge(co, n1, n2, costs);
779                 add_edge(co, n2, n1, costs);
780         }
781 }
782
783 static void build_graph_walker(ir_node *irn, void *env) {
784         const arch_register_req_t *req = arch_get_register_req_out(irn);
785         copy_opt_t                *co  = env;
786         int pos, max;
787         const arch_register_t *reg;
788
789         if (req->cls != co->cls || arch_irn_is_ignore(irn))
790                 return;
791
792         reg = arch_get_irn_register(irn);
793         if (arch_register_type_is(reg, ignore))
794                 return;
795
796         if (is_Reg_Phi(irn)) { /* Phis */
797                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
798                         ir_node *arg = get_irn_n(irn, pos);
799                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
800                 }
801         } else if (is_Perm_Proj(irn)) { /* Perms */
802                 ir_node *arg = get_Perm_src(irn);
803                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
804         } else { /* 2-address code */
805                 if (is_2addr_code(req)) {
806                         const unsigned other = req->other_same;
807                         int i;
808
809                         for (i = 0; 1U << i <= other; ++i) {
810                                 if (other & (1U << i)) {
811                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
812                                         if (!arch_irn_is_ignore(other))
813                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
814                                 }
815                         }
816                 }
817         }
818 }
819
820 void co_build_graph_structure(copy_opt_t *co) {
821         obstack_init(&co->obst);
822         co->nodes = new_set(compare_affinity_node_t, 32);
823
824         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
825 }
826
827 void co_free_graph_structure(copy_opt_t *co) {
828         ASSERT_GS_AVAIL(co);
829
830         del_set(co->nodes);
831         obstack_free(&co->obst, NULL);
832         co->nodes = NULL;
833 }
834
835 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
836
837 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
838         affinity_node_t new_node, *n;
839
840         ASSERT_GS_AVAIL(co);
841
842         new_node.irn = irn;
843         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
844         if (n) {
845                 return (n->degree > 0);
846         } else
847                 return 0;
848 }
849
850 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
851 {
852         ir_node *nodes[]  = { a, b };
853         bitset_t *constr[] = { NULL, NULL };
854         int j;
855
856         constr[0] = bitset_alloca(co->cls->n_regs);
857         constr[1] = bitset_alloca(co->cls->n_regs);
858
859         for (j = 0; j < 2; ++j) {
860                 const arch_register_req_t *req = arch_get_register_req_out(nodes[j]);
861                 if(arch_register_req_is(req, limited))
862                         rbitset_copy_to_bitset(req->limited, constr[j]);
863                 else
864                         bitset_set_all(constr[j]);
865
866         }
867
868         return !bitset_intersect(constr[0], constr[1]);
869 }
870
871 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
872 {
873         be_ifg_t *ifg       = co->cenv->ifg;
874         int      *color_map = ALLOCAN(int, co->cls->n_regs);
875         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
876
877         ir_node *irn;
878         void *it, *nit;
879         int n, n_regs;
880         unsigned i;
881
882         n_regs = 0;
883         for(i = 0; i < co->cls->n_regs; ++i) {
884                 const arch_register_t *reg = &co->cls->regs[i];
885                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
886         }
887
888         /*
889          * n contains the first node number.
890          * the values below n are the pre-colored register nodes
891          */
892
893         it  = be_ifg_nodes_iter_alloca(ifg);
894         nit = be_ifg_neighbours_iter_alloca(ifg);
895
896         n = n_regs;
897         be_ifg_foreach_node(ifg, it, irn) {
898                 if (arch_irn_is_ignore(irn))
899                         continue;
900                 node_map[get_irn_idx(irn)] = n++;
901         }
902
903         fprintf(f, "%d %d\n", n, n_regs);
904
905         be_ifg_foreach_node(ifg, it, irn) {
906                 if (!arch_irn_is_ignore(irn)) {
907                         int idx                        = node_map[get_irn_idx(irn)];
908                         affinity_node_t           *a   = get_affinity_info(co, irn);
909                         const arch_register_req_t *req = arch_get_register_req_out(irn);
910                         ir_node                   *adj;
911
912                         if(arch_register_req_is(req, limited)) {
913                                 for(i = 0; i < co->cls->n_regs; ++i) {
914                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
915                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
916                                 }
917                         }
918
919                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
920                                 if (!arch_irn_is_ignore(adj) &&
921                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
922                                         int adj_idx = node_map[get_irn_idx(adj)];
923                                         if(idx < adj_idx)
924                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
925                                 }
926                         }
927
928                         if(a) {
929                                 neighb_t *n;
930
931                                 co_gs_foreach_neighb(a, n) {
932                                         if (!arch_irn_is_ignore(n->irn)) {
933                                                 int n_idx = node_map[get_irn_idx(n->irn)];
934                                                 if(idx < n_idx)
935                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
936                                         }
937                                 }
938                         }
939                 }
940         }
941
942         xfree(node_map);
943 }
944
945 /*
946          ___ _____ ____   ____   ___ _____   ____                        _
947         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
948          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
949          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
950         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
951                                                                   |_|            |___/
952 */
953
954 static const char *get_dot_color_name(size_t col)
955 {
956         static const char *names[] = {
957                 "blue",
958                 "red",
959                 "green",
960                 "yellow",
961                 "cyan",
962                 "magenta",
963                 "orange",
964                 "chocolate",
965                 "beige",
966                 "navy",
967                 "darkgreen",
968                 "darkred",
969                 "lightPink",
970                 "chartreuse",
971                 "lightskyblue",
972                 "linen",
973                 "pink",
974                 "lightslateblue",
975                 "mintcream",
976                 "red",
977                 "darkolivegreen",
978                 "mediumblue",
979                 "mistyrose",
980                 "salmon",
981                 "darkseagreen",
982                 "mediumslateblue"
983                 "moccasin",
984                 "tomato",
985                 "forestgreen",
986                 "darkturquoise",
987                 "palevioletred"
988         };
989
990         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
991 }
992
993 typedef struct _co_ifg_dump_t {
994         const copy_opt_t *co;
995         unsigned flags;
996 } co_ifg_dump_t;
997
998 static void ifg_dump_graph_attr(FILE *f, void *self)
999 {
1000         (void) self;
1001         fprintf(f, "overlap=scale");
1002 }
1003
1004 static int ifg_is_dump_node(void *self, ir_node *irn)
1005 {
1006         (void)self;
1007         return !arch_irn_is_ignore(irn);
1008 }
1009
1010 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1011 {
1012         co_ifg_dump_t             *env     = self;
1013         const arch_register_t     *reg     = arch_get_irn_register(irn);
1014         const arch_register_req_t *req     = arch_get_register_req_out(irn);
1015         int                        limited = arch_register_req_is(req, limited);
1016
1017         if(env->flags & CO_IFG_DUMP_LABELS) {
1018                 ir_fprintf(f, "label=\"%+F", irn);
1019
1020                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1021                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1022                         rbitset_copy_to_bitset(req->limited, bs);
1023                         ir_fprintf(f, "\\n%B", bs);
1024                 }
1025                 ir_fprintf(f, "\" ");
1026         } else {
1027                 fprintf(f, "label=\"\" shape=point " );
1028         }
1029
1030         if(env->flags & CO_IFG_DUMP_SHAPE)
1031                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1032
1033         if(env->flags & CO_IFG_DUMP_COLORS)
1034                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1035 }
1036
1037 static void ifg_dump_at_end(FILE *file, void *self)
1038 {
1039         co_ifg_dump_t *env = self;
1040         affinity_node_t *a;
1041
1042         co_gs_foreach_aff_node(env->co, a) {
1043                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1044                 unsigned aidx = get_irn_idx(a->irn);
1045                 neighb_t *n;
1046
1047                 co_gs_foreach_neighb(a, n) {
1048                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1049                         unsigned nidx = get_irn_idx(n->irn);
1050
1051                         if(aidx < nidx) {
1052                                 const char *color = nr == ar ? "blue" : "red";
1053                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1054                                 if(env->flags & CO_IFG_DUMP_LABELS)
1055                                         fprintf(file, "label=\"%d\" ", n->costs);
1056                                 if(env->flags & CO_IFG_DUMP_COLORS)
1057                                         fprintf(file, "color=%s ", color);
1058                                 else
1059                                         fprintf(file, "style=dotted");
1060                                 fprintf(file, "];\n");
1061                         }
1062                 }
1063         }
1064 }
1065
1066
1067 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1068         ifg_is_dump_node,
1069         ifg_dump_graph_attr,
1070         ifg_dump_node_attr,
1071         NULL,
1072         NULL,
1073         ifg_dump_at_end
1074 };
1075
1076
1077
1078 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1079 {
1080         co_ifg_dump_t cod;
1081
1082         cod.co    = co;
1083         cod.flags = flags;
1084         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1085 }
1086
1087
1088 void co_solve_park_moon(copy_opt_t *opt)
1089 {
1090         (void) opt;
1091 }
1092
1093 /*
1094     __  __       _         ____       _
1095    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1096    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1097    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1098    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1099
1100 */
1101
1102 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1103 {
1104         FILE *result;
1105         char buf[1024];
1106         size_t i, n;
1107         char *tu_name;
1108
1109         n = strlen(env->birg->main_env->cup_name);
1110         tu_name = XMALLOCN(char, n + 1);
1111         strcpy(tu_name, env->birg->main_env->cup_name);
1112         for (i = 0; i < n; ++i)
1113                 if (tu_name[i] == '.')
1114                         tu_name[i] = '_';
1115
1116
1117         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1118         xfree(tu_name);
1119         result = fopen(buf, "wt");
1120         if(result == NULL) {
1121                 panic("Couldn't open '%s' for writing.", buf);
1122         }
1123
1124         return result;
1125 }
1126
1127 void co_driver(be_chordal_env_t *cenv)
1128 {
1129         ir_timer_t          *timer = ir_timer_register("firm.be.copyopt", "runtime");
1130         co_complete_stats_t before, after;
1131         copy_opt_t          *co;
1132         int                 was_optimal = 0;
1133
1134         assert(selected_copyopt);
1135
1136         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1137
1138         co = new_copy_opt(cenv, cost_func);
1139         co_build_ou_structure(co);
1140         co_build_graph_structure(co);
1141
1142         co_complete_stats(co, &before);
1143
1144         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1145         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1146         be_stat_ev_ull("co_max_costs",    before.max_costs);
1147         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1148         be_stat_ev_ull("co_aff_int",      before.aff_int);
1149
1150         be_stat_ev_ull("co_init_costs",   before.costs);
1151         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1152
1153         if (dump_flags & DUMP_BEFORE) {
1154                 FILE *f = my_open(cenv, "", "-before.dot");
1155                 co_dump_ifg_dot(co, f, style_flags);
1156                 fclose(f);
1157         }
1158
1159         /* if the algo can improve results, provide an initial solution with heur1 */
1160         if (improve && selected_copyopt->can_improve_existing) {
1161                 co_complete_stats_t stats;
1162
1163                 /* produce a heuristic solution */
1164                 co_solve_heuristic(co);
1165
1166                 /* do the stats and provide the current costs */
1167                 co_complete_stats(co, &stats);
1168                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1169         }
1170
1171         /* perform actual copy minimization */
1172         ir_timer_reset_and_start(timer);
1173         was_optimal = selected_copyopt->copyopt(co);
1174         ir_timer_stop(timer);
1175
1176         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1177         be_stat_ev_ull("co_optimal", was_optimal);
1178
1179         if (dump_flags & DUMP_AFTER) {
1180                 FILE *f = my_open(cenv, "", "-after.dot");
1181                 co_dump_ifg_dot(co, f, style_flags);
1182                 fclose(f);
1183         }
1184
1185         co_complete_stats(co, &after);
1186
1187         if (do_stats) {
1188                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1189                 ulong64 evitable          = after.costs     - after.inevit_costs;
1190
1191                 ir_printf("%30F ", cenv->irg);
1192                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1193
1194                 if(optimizable_costs > 0)
1195                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1196                 else
1197                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1198         }
1199
1200         /* Dump the interference graph in Appel's format. */
1201         if (dump_flags & DUMP_APPEL) {
1202                 FILE *f = my_open(cenv, "", ".apl");
1203                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1204                 co_dump_appel_graph(co, f);
1205                 fclose(f);
1206         }
1207
1208         be_stat_ev_ull("co_after_costs", after.costs);
1209         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1210
1211         co_free_graph_structure(co);
1212         co_free_ou_structure(co);
1213         free_copy_opt(co);
1214 }