Another rewrite of prolog/epilog handling: Delay their creation until after register...
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "irbitset.h"
46 #include "irphase_t.h"
47 #include "irprintf_t.h"
48
49 #include "bemodule.h"
50 #include "bearch.h"
51 #include "benode.h"
52 #include "beutil.h"
53 #include "beifg.h"
54 #include "beintlive_t.h"
55 #include "becopyopt_t.h"
56 #include "becopystat.h"
57 #include "belive_t.h"
58 #include "beinsn_t.h"
59 #include "besched.h"
60 #include "bestatevent.h"
61 #include "beirg.h"
62 #include "error.h"
63
64 #include "lc_opts.h"
65 #include "lc_opts_enum.h"
66
67 #define DUMP_BEFORE 1
68 #define DUMP_AFTER  2
69 #define DUMP_APPEL  4
70 #define DUMP_ALL    2 * DUMP_APPEL - 1
71
72 #define COST_FUNC_FREQ     1
73 #define COST_FUNC_LOOP     2
74 #define COST_FUNC_ALL_ONE  3
75
76 static unsigned   dump_flags  = 0;
77 static unsigned   style_flags = 0;
78 static unsigned   do_stats    = 0;
79 static cost_fct_t cost_func   = co_get_costs_exec_freq;
80 static int        improve     = 1;
81
82 static const lc_opt_enum_mask_items_t dump_items[] = {
83         { "before",  DUMP_BEFORE },
84         { "after",   DUMP_AFTER  },
85         { "appel",   DUMP_APPEL  },
86         { "all",     DUMP_ALL    },
87         { NULL,      0 }
88 };
89
90 static const lc_opt_enum_mask_items_t style_items[] = {
91         { "color",   CO_IFG_DUMP_COLORS },
92         { "labels",  CO_IFG_DUMP_LABELS },
93         { "constr",  CO_IFG_DUMP_CONSTR },
94         { "shape",   CO_IFG_DUMP_SHAPE  },
95         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
96         { NULL,      0 }
97 };
98
99 typedef int (*opt_funcptr)(void);
100
101 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
102         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
103         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
104         { "one",    (opt_funcptr) co_get_costs_all_one },
105         { NULL,     NULL }
106 };
107
108 static lc_opt_enum_mask_var_t dump_var = {
109         &dump_flags, dump_items
110 };
111
112 static lc_opt_enum_mask_var_t style_var = {
113         &style_flags, style_items
114 };
115
116 static lc_opt_enum_func_ptr_var_t cost_func_var = {
117         (opt_funcptr*) &cost_func, cost_func_items
118 };
119
120 static const lc_opt_table_entry_t options[] = {
121         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
122         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
123         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
124         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
125         LC_OPT_ENT_BOOL          ("improve", "run heur1 before if algo can exploit start solutions",    &improve),
126         LC_OPT_LAST
127 };
128
129 static be_module_list_entry_t *copyopts = NULL;
130 static const co_algo_info *selected_copyopt = NULL;
131
132 void be_register_copyopt(const char *name, co_algo_info *copyopt)
133 {
134         if (selected_copyopt == NULL)
135                 selected_copyopt = copyopt;
136         be_add_module_to_list(&copyopts, name, copyopt);
137 }
138
139 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copyopt);
140 void be_init_copyopt(void)
141 {
142         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
143         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
144         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
145         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
146
147         lc_opt_add_table(co_grp, options);
148         be_add_module_list_opt(co_grp, "algo", "select copy optimization algo",
149                                        &copyopts, (void**) &selected_copyopt);
150 }
151
152 static int void_algo(copy_opt_t *co)
153 {
154         (void) co;
155         return 0;
156 }
157
158 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copynone);
159 void be_init_copynone(void)
160 {
161         static co_algo_info copyheur = {
162                 void_algo, 0
163         };
164
165         be_register_copyopt("none", &copyheur);
166 }
167
168 #undef QUICK_AND_DIRTY_HACK
169
170 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
171 {
172         if (env->ifg)
173                 return be_ifg_connected(env->ifg, a, b);
174         else {
175                 be_lv_t *lv = be_get_irg_liveness(env->irg);
176                 return be_values_interfere(lv, a, b);
177         }
178 }
179
180
181 /******************************************************************************
182     _____                           _
183    / ____|                         | |
184   | |  __  ___ _ __   ___ _ __ __ _| |
185   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
186   | |__| |  __/ | | |  __/ | | (_| | |
187    \_____|\___|_| |_|\___|_|  \__,_|_|
188
189  ******************************************************************************/
190
191 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
192
193
194 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
195 {
196         const char *s1, *s2, *s3;
197         size_t len;
198         copy_opt_t *co;
199
200         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
201
202         co = XMALLOCZ(copy_opt_t);
203         co->cenv      = chordal_env;
204         co->irg       = chordal_env->irg;
205         co->cls       = chordal_env->cls;
206         co->get_costs = get_costs;
207
208         s1 = get_irp_name();
209         s2 = get_entity_name(get_irg_entity(co->irg));
210         s3 = chordal_env->cls->name;
211         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
212         co->name = XMALLOCN(char, len);
213         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
214
215         return co;
216 }
217
218 void free_copy_opt(copy_opt_t *co)
219 {
220         xfree(co->name);
221         free(co);
222 }
223
224 /**
225  * Checks if a node is optimizable, viz. has something to do with coalescing
226  * @param irn  The irn to check
227  */
228 static int co_is_optimizable_root(ir_node *irn)
229 {
230         const arch_register_req_t *req;
231
232         if (arch_irn_is_ignore(irn))
233                 return 0;
234
235         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
236                 return 1;
237
238         req = arch_get_register_req_out(irn);
239         if (is_2addr_code(req))
240                 return 1;
241
242         return 0;
243 }
244
245 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
246 {
247         int cost = 0;
248         ir_loop *loop;
249         ir_node *root_block = get_nodes_block(root);
250         (void) co;
251         (void) arg;
252
253         if (is_Phi(root)) {
254                 /* for phis the copies are placed in the corresponding pred-block */
255                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
256         } else {
257                 /* a perm places the copy in the same block as it resides */
258                 loop = get_irn_loop(root_block);
259         }
260         if (loop) {
261                 int d = get_loop_depth(loop);
262                 cost = d*d;
263         }
264         return 1+cost;
265 }
266
267 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
268 {
269         int res;
270         ir_node *root_bl = get_nodes_block(root);
271         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
272         ir_exec_freq *exec_freq = be_get_irg_exec_freq(co->cenv->irg);
273         (void) arg;
274         res = get_block_execfreq_ulong(exec_freq, copy_bl);
275
276         /* don't allow values smaller than one. */
277         return res < 1 ? 1 : res;
278 }
279
280
281 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos)
282 {
283         (void) co;
284         (void) root;
285         (void) arg;
286         (void) pos;
287         return 1;
288 }
289
290 /******************************************************************************
291    ____        _   _    _       _ _          _____ _
292   / __ \      | | | |  | |     (_) |        / ____| |
293  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
294  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
295  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
296   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
297         | |                                                            __/ |
298         |_|                                                           |___/
299  ******************************************************************************/
300
301 /**
302  * Determines a maximum weighted independent set with respect to
303  * the interference and conflict edges of all nodes in a qnode.
304  */
305 static int ou_max_ind_set_costs(unit_t *ou)
306 {
307         be_chordal_env_t *chordal_env = ou->co->cenv;
308         ir_node **safe, **unsafe;
309         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
310         bitset_t *curr;
311         size_t  pos;
312         int curr_weight, best_weight = 0;
313
314         /* assign the nodes into two groups.
315          * safe: node has no interference, hence it is in every max stable set.
316          * unsafe: node has an interference
317          */
318         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
319         safe_costs   = 0;
320         safe_count   = 0;
321         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
322         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
323         unsafe_count = 0;
324         for (i=1; i<ou->node_count; ++i) {
325                 int is_safe = 1;
326                 for (o=1; o<ou->node_count; ++o) {
327                         if (i==o)
328                                 continue;
329                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
330                                 unsafe_costs[unsafe_count] = ou->costs[i];
331                                 unsafe[unsafe_count] = ou->nodes[i];
332                                 ++unsafe_count;
333                                 is_safe = 0;
334                                 break;
335                         }
336                 }
337                 if (is_safe) {
338                         safe_costs += ou->costs[i];
339                         safe[safe_count++] = ou->nodes[i];
340                 }
341         }
342
343
344         /* now compute the best set out of the unsafe nodes*/
345         if (unsafe_count > MIS_HEUR_TRIGGER) {
346                 bitset_t *best = bitset_alloca(unsafe_count);
347                 /* Heuristic: Greedy trial and error form index 0 to unsafe_count-1 */
348                 for (i=0; i<unsafe_count; ++i) {
349                         bitset_set(best, i);
350                         /* check if it is a stable set */
351                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
352                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
353                                         bitset_clear(best, i); /* clear the bit and try next one */
354                                         break;
355                                 }
356                 }
357                 /* compute the weight */
358                 bitset_foreach(best, pos)
359                         best_weight += unsafe_costs[pos];
360         } else {
361                 /* Exact Algorithm: Brute force */
362                 curr = bitset_alloca(unsafe_count);
363                 bitset_set_all(curr);
364                 while (bitset_popcount(curr) != 0) {
365                         /* check if curr is a stable set */
366                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
367                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
368                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
369                                                         goto no_stable_set;
370
371                         /* if we arrive here, we have a stable set */
372                         /* compute the weight of the stable set*/
373                         curr_weight = 0;
374                         bitset_foreach(curr, pos)
375                                 curr_weight += unsafe_costs[pos];
376
377                         /* any better ? */
378                         if (curr_weight > best_weight) {
379                                 best_weight = curr_weight;
380                         }
381
382         no_stable_set:
383                         bitset_minus1(curr);
384                 }
385         }
386
387         return safe_costs+best_weight;
388 }
389
390 static void co_collect_units(ir_node *irn, void *env)
391 {
392         const arch_register_req_t *req;
393         copy_opt_t                *co  = (copy_opt_t*)env;
394         unit_t *unit;
395
396         if (get_irn_mode(irn) == mode_T)
397                 return;
398         req = arch_get_register_req_out(irn);
399         if (req->cls != co->cls)
400                 return;
401         if (!co_is_optimizable_root(irn))
402                 return;
403
404         /* Init a new unit */
405         unit = XMALLOCZ(unit_t);
406         unit->co = co;
407         unit->node_count = 1;
408         INIT_LIST_HEAD(&unit->queue);
409
410         /* Phi with some/all of its arguments */
411         if (is_Reg_Phi(irn)) {
412                 int i, arity;
413
414                 /* init */
415                 arity = get_irn_arity(irn);
416                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
417                 unit->costs = XMALLOCN(int,      arity + 1);
418                 unit->nodes[0] = irn;
419
420                 /* fill */
421                 for (i=0; i<arity; ++i) {
422                         int o, arg_pos;
423                         ir_node *arg = get_irn_n(irn, i);
424
425                         assert(arch_get_irn_reg_class_out(arg) == co->cls && "Argument not in same register class.");
426                         if (arg == irn)
427                                 continue;
428                         if (nodes_interfere(co->cenv, irn, arg)) {
429                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
430                                 continue;
431                         }
432
433                         /* Else insert the argument of the phi to the members of this ou */
434                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
435
436                         if (arch_irn_is_ignore(arg))
437                                 continue;
438
439                         /* Check if arg has occurred at a prior position in the arg/list */
440                         arg_pos = 0;
441                         for (o=1; o<unit->node_count; ++o) {
442                                 if (unit->nodes[o] == arg) {
443                                         arg_pos = o;
444                                         break;
445                                 }
446                         }
447
448                         if (!arg_pos) { /* a new argument */
449                                 /* insert node, set costs */
450                                 unit->nodes[unit->node_count] = arg;
451                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
452                                 unit->node_count++;
453                         } else { /* arg has occurred before in same phi */
454                                 /* increase costs for existing arg */
455                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
456                         }
457                 }
458                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
459                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
460         } else if (is_Perm_Proj(irn)) {
461                 /* Proj of a perm with corresponding arg */
462                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
463                 unit->nodes = XMALLOCN(ir_node*, 2);
464                 unit->costs = XMALLOCN(int,      2);
465                 unit->node_count = 2;
466                 unit->nodes[0] = irn;
467                 unit->nodes[1] = get_Perm_src(irn);
468                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
469         } else {
470                 /* Src == Tgt of a 2-addr-code instruction */
471                 if (is_2addr_code(req)) {
472                         const unsigned other = req->other_same;
473                         int            count = 0;
474                         int            i;
475
476                         for (i = 0; (1U << i) <= other; ++i) {
477                                 if (other & (1U << i)) {
478                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
479                                         if (arch_irn_is_ignore(o))
480                                                 continue;
481                                         if (nodes_interfere(co->cenv, irn, o))
482                                                 continue;
483                                         ++count;
484                                 }
485                         }
486
487                         if (count != 0) {
488                                 int k = 0;
489                                 ++count;
490                                 unit->nodes = XMALLOCN(ir_node*, count);
491                                 unit->costs = XMALLOCN(int,      count);
492                                 unit->node_count = count;
493                                 unit->nodes[k++] = irn;
494
495                                 for (i = 0; 1U << i <= other; ++i) {
496                                         if (other & (1U << i)) {
497                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
498                                                 if (!arch_irn_is_ignore(o) &&
499                                                                 !nodes_interfere(co->cenv, irn, o)) {
500                                                         unit->nodes[k] = o;
501                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
502                                                         ++k;
503                                                 }
504                                         }
505                                 }
506                         }
507                 } else {
508                         assert(0 && "This is not an optimizable node!");
509                 }
510         }
511
512         /* Insert the new unit at a position according to its costs */
513         if (unit->node_count > 1) {
514                 int i;
515                 struct list_head *tmp;
516
517                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
518                 for (i=1; i<unit->node_count; ++i) {
519                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
520                         unit->all_nodes_costs += unit->costs[i];
521                 }
522
523                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
524                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
525                 /* Insert the new ou according to its sort_key */
526                 tmp = &co->units;
527                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
528                         tmp = tmp->next;
529                 list_add(&unit->units, tmp);
530         } else {
531                 free(unit);
532         }
533 }
534
535 #ifdef QUICK_AND_DIRTY_HACK
536
537 static int compare_ous(const void *k1, const void *k2)
538 {
539         const unit_t *u1 = *((const unit_t **) k1);
540         const unit_t *u2 = *((const unit_t **) k2);
541         int i, o, u1_has_constr, u2_has_constr;
542         arch_register_req_t req;
543
544         /* Units with constraints come first */
545         u1_has_constr = 0;
546         for (i=0; i<u1->node_count; ++i) {
547                 arch_get_register_req_out(&req, u1->nodes[i]);
548                 if (arch_register_req_is(&req, limited)) {
549                         u1_has_constr = 1;
550                         break;
551                 }
552         }
553
554         u2_has_constr = 0;
555         for (i=0; i<u2->node_count; ++i) {
556                 arch_get_register_req_out(&req, u2->nodes[i]);
557                 if (arch_register_req_is(&req, limited)) {
558                         u2_has_constr = 1;
559                         break;
560                 }
561         }
562
563         if (u1_has_constr != u2_has_constr)
564                 return u2_has_constr - u1_has_constr;
565
566         /* Now check, whether the two units are connected */
567 #if 0
568         for (i=0; i<u1->node_count; ++i)
569                 for (o=0; o<u2->node_count; ++o)
570                         if (u1->nodes[i] == u2->nodes[o])
571                                 return 0;
572 #endif
573
574         /* After all, the sort key decides. Greater keys come first. */
575         return u2->sort_key - u1->sort_key;
576
577 }
578
579 /**
580  * Sort the ou's according to constraints and their sort_key
581  */
582 static void co_sort_units(copy_opt_t *co)
583 {
584         int i, count = 0, costs;
585         unit_t *ou, **ous;
586
587         /* get the number of ous, remove them form the list and fill the array */
588         list_for_each_entry(unit_t, ou, &co->units, units)
589                 count++;
590         ous = ALLOCAN(unit_t, count);
591
592         costs = co_get_max_copy_costs(co);
593
594         i = 0;
595         list_for_each_entry(unit_t, ou, &co->units, units)
596                 ous[i++] = ou;
597
598         INIT_LIST_HEAD(&co->units);
599
600         assert(count == i && list_empty(&co->units));
601
602         for (i=0; i<count; ++i)
603                 ir_printf("%+F\n", ous[i]->nodes[0]);
604
605         qsort(ous, count, sizeof(*ous), compare_ous);
606
607         ir_printf("\n\n");
608         for (i=0; i<count; ++i)
609                 ir_printf("%+F\n", ous[i]->nodes[0]);
610
611         /* reinsert into list in correct order */
612         for (i=0; i<count; ++i)
613                 list_add_tail(&ous[i]->units, &co->units);
614
615         assert(costs == co_get_max_copy_costs(co));
616 }
617 #endif
618
619 void co_build_ou_structure(copy_opt_t *co)
620 {
621         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
622         INIT_LIST_HEAD(&co->units);
623         irg_walk_graph(co->irg, co_collect_units, NULL, co);
624 #ifdef QUICK_AND_DIRTY_HACK
625         co_sort_units(co);
626 #endif
627 }
628
629 void co_free_ou_structure(copy_opt_t *co)
630 {
631         unit_t *curr, *tmp;
632         ASSERT_OU_AVAIL(co);
633         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
634                 xfree(curr->nodes);
635                 xfree(curr->costs);
636                 xfree(curr);
637         }
638         co->units.next = NULL;
639 }
640
641 /* co_solve_heuristic() is implemented in becopyheur.c */
642
643 int co_get_max_copy_costs(const copy_opt_t *co)
644 {
645         int i, res = 0;
646         unit_t *curr;
647
648         ASSERT_OU_AVAIL(co);
649
650         list_for_each_entry(unit_t, curr, &co->units, units) {
651                 res += curr->inevitable_costs;
652                 for (i=1; i<curr->node_count; ++i)
653                         res += curr->costs[i];
654         }
655         return res;
656 }
657
658 int co_get_inevit_copy_costs(const copy_opt_t *co)
659 {
660         int res = 0;
661         unit_t *curr;
662
663         ASSERT_OU_AVAIL(co);
664
665         list_for_each_entry(unit_t, curr, &co->units, units)
666                 res += curr->inevitable_costs;
667         return res;
668 }
669
670 int co_get_copy_costs(const copy_opt_t *co)
671 {
672         int i, res = 0;
673         unit_t *curr;
674
675         ASSERT_OU_AVAIL(co);
676
677         list_for_each_entry(unit_t, curr, &co->units, units) {
678                 int root_col = get_irn_col(curr->nodes[0]);
679                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
680                 res += curr->inevitable_costs;
681                 for (i=1; i<curr->node_count; ++i) {
682                         int arg_col = get_irn_col(curr->nodes[i]);
683                         if (root_col != arg_col) {
684                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
685                                 res += curr->costs[i];
686                         }
687                 }
688         }
689         return res;
690 }
691
692 int co_get_lower_bound(const copy_opt_t *co)
693 {
694         int res = 0;
695         unit_t *curr;
696
697         ASSERT_OU_AVAIL(co);
698
699         list_for_each_entry(unit_t, curr, &co->units, units)
700                 res += curr->inevitable_costs + curr->min_nodes_costs;
701         return res;
702 }
703
704 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
705 {
706         bitset_t *seen = bitset_irg_malloc(co->irg);
707         affinity_node_t *an;
708
709         memset(stat, 0, sizeof(stat[0]));
710
711         /* count affinity edges. */
712         co_gs_foreach_aff_node(co, an) {
713                 neighb_t *neigh;
714                 stat->aff_nodes += 1;
715                 bitset_add_irn(seen, an->irn);
716                 co_gs_foreach_neighb(an, neigh) {
717                         if (!bitset_contains_irn(seen, neigh->irn)) {
718                                 stat->aff_edges += 1;
719                                 stat->max_costs += neigh->costs;
720
721                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
722                                         stat->costs += neigh->costs;
723                                         stat->unsatisfied_edges += 1;
724                                 }
725
726                                 if (nodes_interfere(co->cenv, an->irn, neigh->irn)) {
727                                         stat->aff_int += 1;
728                                         stat->inevit_costs += neigh->costs;
729                                 }
730
731                         }
732                 }
733         }
734
735         bitset_free(seen);
736 }
737
738 /******************************************************************************
739    _____                 _        _____ _
740   / ____|               | |      / ____| |
741  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
742  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
743  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
744   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
745                   | |                                       __/ |
746                   |_|                                      |___/
747  ******************************************************************************/
748
749 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size)
750 {
751         const affinity_node_t *n1 = (const affinity_node_t*)k1;
752         const affinity_node_t *n2 = (const affinity_node_t*)k2;
753         (void) size;
754
755         return (n1->irn != n2->irn);
756 }
757
758 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
759 {
760         affinity_node_t new_node, *node;
761         neighb_t        *nbr;
762         int             allocnew = 1;
763
764         new_node.irn        = n1;
765         new_node.degree     = 0;
766         new_node.neighbours = NULL;
767         node = (affinity_node_t*)set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
768
769         for (nbr = node->neighbours; nbr; nbr = nbr->next)
770                 if (nbr->irn == n2) {
771                         allocnew = 0;
772                         break;
773                 }
774
775         /* if we did not find n2 in n1's neighbourhood insert it */
776         if (allocnew) {
777                 nbr        = OALLOC(&co->obst, neighb_t);
778                 nbr->irn   = n2;
779                 nbr->costs = 0;
780                 nbr->next  = node->neighbours;
781
782                 node->neighbours = nbr;
783                 node->degree++;
784         }
785
786         /* now nbr points to n1's neighbour-entry of n2 */
787         nbr->costs += costs;
788 }
789
790 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
791 {
792         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
793                 add_edge(co, n1, n2, costs);
794                 add_edge(co, n2, n1, costs);
795         }
796 }
797
798 static void build_graph_walker(ir_node *irn, void *env)
799 {
800         const arch_register_req_t *req;
801         copy_opt_t                *co  = (copy_opt_t*)env;
802         int pos, max;
803
804         if (get_irn_mode(irn) == mode_T)
805                 return;
806         req = arch_get_register_req_out(irn);
807         if (req->cls != co->cls || arch_irn_is_ignore(irn))
808                 return;
809
810         if (is_Reg_Phi(irn)) { /* Phis */
811                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
812                         ir_node *arg = get_irn_n(irn, pos);
813                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
814                 }
815         } else if (is_Perm_Proj(irn)) { /* Perms */
816                 ir_node *arg = get_Perm_src(irn);
817                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
818         } else { /* 2-address code */
819                 if (is_2addr_code(req)) {
820                         const unsigned other = req->other_same;
821                         int i;
822
823                         for (i = 0; 1U << i <= other; ++i) {
824                                 if (other & (1U << i)) {
825                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
826                                         if (!arch_irn_is_ignore(other))
827                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
828                                 }
829                         }
830                 }
831         }
832 }
833
834 void co_build_graph_structure(copy_opt_t *co)
835 {
836         obstack_init(&co->obst);
837         co->nodes = new_set(compare_affinity_node_t, 32);
838
839         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
840 }
841
842 void co_free_graph_structure(copy_opt_t *co)
843 {
844         ASSERT_GS_AVAIL(co);
845
846         del_set(co->nodes);
847         obstack_free(&co->obst, NULL);
848         co->nodes = NULL;
849 }
850
851 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn)
852 {
853         affinity_node_t new_node, *n;
854
855         ASSERT_GS_AVAIL(co);
856
857         new_node.irn = irn;
858         n = (affinity_node_t*)set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
859         if (n) {
860                 return (n->degree > 0);
861         } else
862                 return 0;
863 }
864
865 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
866 {
867         ir_node *nodes[]  = { a, b };
868         bitset_t *constr[] = { NULL, NULL };
869         int j;
870
871         constr[0] = bitset_alloca(co->cls->n_regs);
872         constr[1] = bitset_alloca(co->cls->n_regs);
873
874         for (j = 0; j < 2; ++j) {
875                 const arch_register_req_t *req = arch_get_register_req_out(nodes[j]);
876                 if (arch_register_req_is(req, limited))
877                         rbitset_copy_to_bitset(req->limited, constr[j]);
878                 else
879                         bitset_set_all(constr[j]);
880
881         }
882
883         return !bitset_intersect(constr[0], constr[1]);
884 }
885
886 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
887 {
888         be_ifg_t *ifg       = co->cenv->ifg;
889         int      *color_map = ALLOCAN(int, co->cls->n_regs);
890         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
891         ir_graph *irg       = co->irg;
892         be_irg_t *birg      = be_birg_from_irg(irg);
893
894         ir_node *irn;
895         nodes_iter_t it;
896         neighbours_iter_t nit;
897         int n, n_regs;
898         unsigned i;
899
900         n_regs = 0;
901         for (i = 0; i < co->cls->n_regs; ++i) {
902                 const arch_register_t *reg = &co->cls->regs[i];
903                 if (rbitset_is_set(birg->allocatable_regs, reg->global_index)) {
904                         color_map[i] = n_regs++;
905                 } else {
906                         color_map[i] = -1;
907                 }
908         }
909
910         /*
911          * n contains the first node number.
912          * the values below n are the pre-colored register nodes
913          */
914
915         n = n_regs;
916         be_ifg_foreach_node(ifg, &it, irn) {
917                 if (arch_irn_is_ignore(irn))
918                         continue;
919                 node_map[get_irn_idx(irn)] = n++;
920         }
921
922         fprintf(f, "%d %d\n", n, n_regs);
923
924         be_ifg_foreach_node(ifg, &it, irn) {
925                 if (!arch_irn_is_ignore(irn)) {
926                         int idx                        = node_map[get_irn_idx(irn)];
927                         affinity_node_t           *a   = get_affinity_info(co, irn);
928                         const arch_register_req_t *req = arch_get_register_req_out(irn);
929                         ir_node                   *adj;
930
931                         if (arch_register_req_is(req, limited)) {
932                                 for (i = 0; i < co->cls->n_regs; ++i) {
933                                         if (!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
934                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
935                                 }
936                         }
937
938                         be_ifg_foreach_neighbour(ifg, &nit, irn, adj) {
939                                 if (!arch_irn_is_ignore(adj) &&
940                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
941                                         int adj_idx = node_map[get_irn_idx(adj)];
942                                         if (idx < adj_idx)
943                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
944                                 }
945                         }
946
947                         if (a) {
948                                 neighb_t *n;
949
950                                 co_gs_foreach_neighb(a, n) {
951                                         if (!arch_irn_is_ignore(n->irn)) {
952                                                 int n_idx = node_map[get_irn_idx(n->irn)];
953                                                 if (idx < n_idx)
954                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
955                                         }
956                                 }
957                         }
958                 }
959         }
960
961         xfree(node_map);
962 }
963
964 /*
965          ___ _____ ____   ____   ___ _____   ____                        _
966         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
967          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
968          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
969         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
970                                                                   |_|            |___/
971 */
972
973 static const char *get_dot_color_name(size_t col)
974 {
975         static const char *names[] = {
976                 "blue",
977                 "red",
978                 "green",
979                 "yellow",
980                 "cyan",
981                 "magenta",
982                 "orange",
983                 "chocolate",
984                 "beige",
985                 "navy",
986                 "darkgreen",
987                 "darkred",
988                 "lightPink",
989                 "chartreuse",
990                 "lightskyblue",
991                 "linen",
992                 "pink",
993                 "lightslateblue",
994                 "mintcream",
995                 "red",
996                 "darkolivegreen",
997                 "mediumblue",
998                 "mistyrose",
999                 "salmon",
1000                 "darkseagreen",
1001                 "mediumslateblue"
1002                 "moccasin",
1003                 "tomato",
1004                 "forestgreen",
1005                 "darkturquoise",
1006                 "palevioletred"
1007         };
1008
1009         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1010 }
1011
1012 typedef struct co_ifg_dump_t {
1013         const copy_opt_t *co;
1014         unsigned flags;
1015 } co_ifg_dump_t;
1016
1017 static void ifg_dump_graph_attr(FILE *f, void *self)
1018 {
1019         (void) self;
1020         fprintf(f, "overlap=scale");
1021 }
1022
1023 static int ifg_is_dump_node(void *self, ir_node *irn)
1024 {
1025         (void)self;
1026         return !arch_irn_is_ignore(irn);
1027 }
1028
1029 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1030 {
1031         co_ifg_dump_t             *env     = (co_ifg_dump_t*)self;
1032         const arch_register_t     *reg     = arch_get_irn_register(irn);
1033         const arch_register_req_t *req     = arch_get_register_req_out(irn);
1034         int                        limited = arch_register_req_is(req, limited);
1035
1036         if (env->flags & CO_IFG_DUMP_LABELS) {
1037                 ir_fprintf(f, "label=\"%+F", irn);
1038
1039                 if ((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1040                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1041                         rbitset_copy_to_bitset(req->limited, bs);
1042                         ir_fprintf(f, "\\n%B", bs);
1043                 }
1044                 ir_fprintf(f, "\" ");
1045         } else {
1046                 fprintf(f, "label=\"\" shape=point " );
1047         }
1048
1049         if (env->flags & CO_IFG_DUMP_SHAPE)
1050                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1051
1052         if (env->flags & CO_IFG_DUMP_COLORS)
1053                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1054 }
1055
1056 static void ifg_dump_at_end(FILE *file, void *self)
1057 {
1058         co_ifg_dump_t *env = (co_ifg_dump_t*)self;
1059         affinity_node_t *a;
1060
1061         co_gs_foreach_aff_node(env->co, a) {
1062                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1063                 unsigned aidx = get_irn_idx(a->irn);
1064                 neighb_t *n;
1065
1066                 co_gs_foreach_neighb(a, n) {
1067                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1068                         unsigned nidx = get_irn_idx(n->irn);
1069
1070                         if (aidx < nidx) {
1071                                 const char *color = nr == ar ? "blue" : "red";
1072                                 fprintf(file, "\tn%u -- n%u [weight=0.01 ", aidx, nidx);
1073                                 if (env->flags & CO_IFG_DUMP_LABELS)
1074                                         fprintf(file, "label=\"%d\" ", n->costs);
1075                                 if (env->flags & CO_IFG_DUMP_COLORS)
1076                                         fprintf(file, "color=%s ", color);
1077                                 else
1078                                         fprintf(file, "style=dotted");
1079                                 fprintf(file, "];\n");
1080                         }
1081                 }
1082         }
1083 }
1084
1085
1086 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1087         ifg_is_dump_node,
1088         ifg_dump_graph_attr,
1089         ifg_dump_node_attr,
1090         NULL,
1091         NULL,
1092         ifg_dump_at_end
1093 };
1094
1095
1096
1097 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1098 {
1099         co_ifg_dump_t cod;
1100
1101         cod.co    = co;
1102         cod.flags = flags;
1103         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1104 }
1105
1106
1107 void co_solve_park_moon(copy_opt_t *opt)
1108 {
1109         (void) opt;
1110 }
1111
1112 /*
1113     __  __       _         ____       _
1114    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1115    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1116    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1117    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1118
1119 */
1120
1121 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1122 {
1123         FILE *result;
1124         char buf[1024];
1125         size_t i, n;
1126         char *tu_name;
1127         const char *cup_name = be_get_irg_main_env(env->irg)->cup_name;
1128
1129         n = strlen(cup_name);
1130         tu_name = XMALLOCN(char, n + 1);
1131         strcpy(tu_name, cup_name);
1132         for (i = 0; i < n; ++i)
1133                 if (tu_name[i] == '.')
1134                         tu_name[i] = '_';
1135
1136
1137         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1138         xfree(tu_name);
1139         result = fopen(buf, "wt");
1140         if (result == NULL) {
1141                 panic("Couldn't open '%s' for writing.", buf);
1142         }
1143
1144         return result;
1145 }
1146
1147 void co_driver(be_chordal_env_t *cenv)
1148 {
1149         ir_timer_t          *timer = ir_timer_new();
1150         co_complete_stats_t before, after;
1151         copy_opt_t          *co;
1152         int                 was_optimal = 0;
1153
1154         assert(selected_copyopt);
1155
1156         /* skip copymin if algo is 'none' */
1157         if (selected_copyopt->copyopt == void_algo)
1158                 return;
1159
1160         be_liveness_assure_chk(be_get_irg_liveness(cenv->irg));
1161
1162         co = new_copy_opt(cenv, cost_func);
1163         co_build_ou_structure(co);
1164         co_build_graph_structure(co);
1165
1166         co_complete_stats(co, &before);
1167
1168         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1169         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1170         be_stat_ev_ull("co_max_costs",    before.max_costs);
1171         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1172         be_stat_ev_ull("co_aff_int",      before.aff_int);
1173
1174         be_stat_ev_ull("co_init_costs",   before.costs);
1175         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1176
1177         if (dump_flags & DUMP_BEFORE) {
1178                 FILE *f = my_open(cenv, "", "-before.dot");
1179                 co_dump_ifg_dot(co, f, style_flags);
1180                 fclose(f);
1181         }
1182
1183         /* if the algo can improve results, provide an initial solution with heur1 */
1184         if (improve && selected_copyopt->can_improve_existing) {
1185                 co_complete_stats_t stats;
1186
1187                 /* produce a heuristic solution */
1188                 co_solve_heuristic(co);
1189
1190                 /* do the stats and provide the current costs */
1191                 co_complete_stats(co, &stats);
1192                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1193         }
1194
1195         /* perform actual copy minimization */
1196         ir_timer_reset_and_start(timer);
1197         was_optimal = selected_copyopt->copyopt(co);
1198         ir_timer_stop(timer);
1199
1200         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1201         be_stat_ev_ull("co_optimal", was_optimal);
1202         ir_timer_free(timer);
1203
1204         if (dump_flags & DUMP_AFTER) {
1205                 FILE *f = my_open(cenv, "", "-after.dot");
1206                 co_dump_ifg_dot(co, f, style_flags);
1207                 fclose(f);
1208         }
1209
1210         co_complete_stats(co, &after);
1211
1212         if (do_stats) {
1213                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1214                 ulong64 evitable          = after.costs     - after.inevit_costs;
1215
1216                 ir_printf("%30F ", cenv->irg);
1217                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1218
1219                 if (optimizable_costs > 0)
1220                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1221                 else
1222                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1223         }
1224
1225         /* Dump the interference graph in Appel's format. */
1226         if (dump_flags & DUMP_APPEL) {
1227                 FILE *f = my_open(cenv, "", ".apl");
1228                 fprintf(f, "# %llu %llu\n", after.costs, after.unsatisfied_edges);
1229                 co_dump_appel_graph(co, f);
1230                 fclose(f);
1231         }
1232
1233         be_stat_ev_ull("co_after_costs", after.costs);
1234         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1235
1236         co_free_graph_structure(co);
1237         co_free_ou_structure(co);
1238         free_copy_opt(co);
1239 }