simply use long long
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #include "config.h"
33
34 #include "execfreq.h"
35 #include "xmalloc.h"
36 #include "debug.h"
37 #include "pmap.h"
38 #include "raw_bitset.h"
39 #include "irnode.h"
40 #include "irgraph.h"
41 #include "irgwalk.h"
42 #include "irprog.h"
43 #include "irloop_t.h"
44 #include "iredges_t.h"
45 #include "irbitset.h"
46 #include "irprintf_t.h"
47 #include "irtools.h"
48 #include "util.h"
49
50 #include "bemodule.h"
51 #include "bearch.h"
52 #include "benode.h"
53 #include "beutil.h"
54 #include "beifg.h"
55 #include "beintlive_t.h"
56 #include "becopyopt_t.h"
57 #include "becopystat.h"
58 #include "belive_t.h"
59 #include "beinsn_t.h"
60 #include "besched.h"
61 #include "bestatevent.h"
62 #include "beirg.h"
63 #include "error.h"
64
65 #include "lc_opts.h"
66 #include "lc_opts_enum.h"
67
68 #define DUMP_BEFORE 1
69 #define DUMP_AFTER  2
70 #define DUMP_APPEL  4
71 #define DUMP_ALL    2 * DUMP_APPEL - 1
72
73 #define COST_FUNC_FREQ     1
74 #define COST_FUNC_LOOP     2
75 #define COST_FUNC_ALL_ONE  3
76
77 static unsigned   dump_flags  = 0;
78 static unsigned   style_flags = 0;
79 static int        do_stats    = 0;
80 static cost_fct_t cost_func   = co_get_costs_exec_freq;
81 static int        improve     = 1;
82
83 static const lc_opt_enum_mask_items_t dump_items[] = {
84         { "before",  DUMP_BEFORE },
85         { "after",   DUMP_AFTER  },
86         { "appel",   DUMP_APPEL  },
87         { "all",     DUMP_ALL    },
88         { NULL,      0 }
89 };
90
91 static const lc_opt_enum_mask_items_t style_items[] = {
92         { "color",   CO_IFG_DUMP_COLORS },
93         { "labels",  CO_IFG_DUMP_LABELS },
94         { "constr",  CO_IFG_DUMP_CONSTR },
95         { "shape",   CO_IFG_DUMP_SHAPE  },
96         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
97         { NULL,      0 }
98 };
99
100 typedef int (*opt_funcptr)(void);
101
102 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
103         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
104         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
105         { "one",    (opt_funcptr) co_get_costs_all_one },
106         { NULL,     NULL }
107 };
108
109 static lc_opt_enum_mask_var_t dump_var = {
110         &dump_flags, dump_items
111 };
112
113 static lc_opt_enum_mask_var_t style_var = {
114         &style_flags, style_items
115 };
116
117 static lc_opt_enum_func_ptr_var_t cost_func_var = {
118         (opt_funcptr*) &cost_func, cost_func_items
119 };
120
121 static const lc_opt_table_entry_t options[] = {
122         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
123         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
124         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
125         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
126         LC_OPT_ENT_BOOL          ("improve", "run heur1 before if algo can exploit start solutions",    &improve),
127         LC_OPT_LAST
128 };
129
130 static be_module_list_entry_t *copyopts = NULL;
131 static const co_algo_info *selected_copyopt = NULL;
132
133 void be_register_copyopt(const char *name, co_algo_info *copyopt)
134 {
135         if (selected_copyopt == NULL)
136                 selected_copyopt = copyopt;
137         be_add_module_to_list(&copyopts, name, copyopt);
138 }
139
140 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copyopt)
141 void be_init_copyopt(void)
142 {
143         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
144         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
145         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
146         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
147
148         lc_opt_add_table(co_grp, options);
149         be_add_module_list_opt(co_grp, "algo", "select copy optimization algo",
150                                        &copyopts, (void**) &selected_copyopt);
151 }
152
153 static int void_algo(copy_opt_t *co)
154 {
155         (void) co;
156         return 0;
157 }
158
159 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copynone)
160 void be_init_copynone(void)
161 {
162         static co_algo_info copyheur = {
163                 void_algo, 0
164         };
165
166         be_register_copyopt("none", &copyheur);
167 }
168
169 #undef QUICK_AND_DIRTY_HACK
170
171 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
172 {
173         if (env->ifg)
174                 return be_ifg_connected(env->ifg, a, b);
175         else {
176                 be_lv_t *lv = be_get_irg_liveness(env->irg);
177                 return be_values_interfere(lv, a, b);
178         }
179 }
180
181
182 /******************************************************************************
183     _____                           _
184    / ____|                         | |
185   | |  __  ___ _ __   ___ _ __ __ _| |
186   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
187   | |__| |  __/ | | |  __/ | | (_| | |
188    \_____|\___|_| |_|\___|_|  \__,_|_|
189
190  ******************************************************************************/
191
192 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
193
194
195 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
196 {
197         const char *s1, *s2, *s3;
198         size_t len;
199         copy_opt_t *co;
200
201         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
202
203         co = XMALLOCZ(copy_opt_t);
204         co->cenv      = chordal_env;
205         co->irg       = chordal_env->irg;
206         co->cls       = chordal_env->cls;
207         co->get_costs = get_costs;
208
209         s1 = get_irp_name();
210         s2 = get_entity_name(get_irg_entity(co->irg));
211         s3 = chordal_env->cls->name;
212         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
213         co->name = XMALLOCN(char, len);
214         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
215
216         return co;
217 }
218
219 void free_copy_opt(copy_opt_t *co)
220 {
221         xfree(co->name);
222         free(co);
223 }
224
225 /**
226  * Checks if a node is optimizable, viz. has something to do with coalescing
227  * @param irn  The irn to check
228  */
229 static int co_is_optimizable_root(ir_node *irn)
230 {
231         const arch_register_req_t *req;
232
233         if (arch_irn_is_ignore(irn))
234                 return 0;
235
236         if (is_Reg_Phi(irn) || is_Perm_Proj(irn))
237                 return 1;
238
239         req = arch_get_irn_register_req(irn);
240         if (is_2addr_code(req))
241                 return 1;
242
243         return 0;
244 }
245
246 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
247 {
248         int cost = 0;
249         ir_loop *loop;
250         ir_node *root_block = get_nodes_block(root);
251         (void) co;
252         (void) arg;
253
254         if (is_Phi(root)) {
255                 /* for phis the copies are placed in the corresponding pred-block */
256                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
257         } else {
258                 /* a perm places the copy in the same block as it resides */
259                 loop = get_irn_loop(root_block);
260         }
261         if (loop) {
262                 int d = get_loop_depth(loop);
263                 cost = d*d;
264         }
265         return 1+cost;
266 }
267
268 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos)
269 {
270         int res;
271         ir_node *root_bl = get_nodes_block(root);
272         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
273         ir_exec_freq *exec_freq = be_get_irg_exec_freq(co->cenv->irg);
274         (void) arg;
275         res = get_block_execfreq_ulong(exec_freq, copy_bl);
276
277         /* don't allow values smaller than one. */
278         return res < 1 ? 1 : res;
279 }
280
281
282 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos)
283 {
284         (void) co;
285         (void) root;
286         (void) arg;
287         (void) pos;
288         return 1;
289 }
290
291 /******************************************************************************
292    ____        _   _    _       _ _          _____ _
293   / __ \      | | | |  | |     (_) |        / ____| |
294  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
295  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
296  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
297   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
298         | |                                                            __/ |
299         |_|                                                           |___/
300  ******************************************************************************/
301
302 /**
303  * Determines a maximum weighted independent set with respect to
304  * the interference and conflict edges of all nodes in a qnode.
305  */
306 static int ou_max_ind_set_costs(unit_t *ou)
307 {
308         be_chordal_env_t *chordal_env = ou->co->cenv;
309         ir_node **safe, **unsafe;
310         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
311         bitset_t *curr;
312         size_t  pos;
313         int curr_weight, best_weight = 0;
314
315         /* assign the nodes into two groups.
316          * safe: node has no interference, hence it is in every max stable set.
317          * unsafe: node has an interference
318          */
319         safe         = ALLOCAN(ir_node*, ou->node_count - 1);
320         safe_costs   = 0;
321         safe_count   = 0;
322         unsafe       = ALLOCAN(ir_node*, ou->node_count - 1);
323         unsafe_costs = ALLOCAN(int,      ou->node_count - 1);
324         unsafe_count = 0;
325         for (i=1; i<ou->node_count; ++i) {
326                 int is_safe = 1;
327                 for (o=1; o<ou->node_count; ++o) {
328                         if (i==o)
329                                 continue;
330                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
331                                 unsafe_costs[unsafe_count] = ou->costs[i];
332                                 unsafe[unsafe_count] = ou->nodes[i];
333                                 ++unsafe_count;
334                                 is_safe = 0;
335                                 break;
336                         }
337                 }
338                 if (is_safe) {
339                         safe_costs += ou->costs[i];
340                         safe[safe_count++] = ou->nodes[i];
341                 }
342         }
343
344
345         /* now compute the best set out of the unsafe nodes*/
346         if (unsafe_count > MIS_HEUR_TRIGGER) {
347                 bitset_t *best = bitset_alloca(unsafe_count);
348                 /* Heuristic: Greedy trial and error form index 0 to unsafe_count-1 */
349                 for (i=0; i<unsafe_count; ++i) {
350                         bitset_set(best, i);
351                         /* check if it is a stable set */
352                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
353                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
354                                         bitset_clear(best, i); /* clear the bit and try next one */
355                                         break;
356                                 }
357                 }
358                 /* compute the weight */
359                 bitset_foreach(best, pos)
360                         best_weight += unsafe_costs[pos];
361         } else {
362                 /* Exact Algorithm: Brute force */
363                 curr = bitset_alloca(unsafe_count);
364                 bitset_set_all(curr);
365                 while (bitset_popcount(curr) != 0) {
366                         /* check if curr is a stable set */
367                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
368                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
369                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
370                                                         goto no_stable_set;
371
372                         /* if we arrive here, we have a stable set */
373                         /* compute the weight of the stable set*/
374                         curr_weight = 0;
375                         bitset_foreach(curr, pos)
376                                 curr_weight += unsafe_costs[pos];
377
378                         /* any better ? */
379                         if (curr_weight > best_weight) {
380                                 best_weight = curr_weight;
381                         }
382
383         no_stable_set:
384                         bitset_minus1(curr);
385                 }
386         }
387
388         return safe_costs+best_weight;
389 }
390
391 static void co_collect_units(ir_node *irn, void *env)
392 {
393         const arch_register_req_t *req;
394         copy_opt_t                *co  = (copy_opt_t*)env;
395         unit_t *unit;
396
397         if (get_irn_mode(irn) == mode_T)
398                 return;
399         req = arch_get_irn_register_req(irn);
400         if (req->cls != co->cls)
401                 return;
402         if (!co_is_optimizable_root(irn))
403                 return;
404
405         /* Init a new unit */
406         unit = XMALLOCZ(unit_t);
407         unit->co = co;
408         unit->node_count = 1;
409         INIT_LIST_HEAD(&unit->queue);
410
411         /* Phi with some/all of its arguments */
412         if (is_Reg_Phi(irn)) {
413                 int i, arity;
414
415                 /* init */
416                 arity = get_irn_arity(irn);
417                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
418                 unit->costs = XMALLOCN(int,      arity + 1);
419                 unit->nodes[0] = irn;
420
421                 /* fill */
422                 for (i=0; i<arity; ++i) {
423                         int o, arg_pos;
424                         ir_node *arg = get_irn_n(irn, i);
425
426                         assert(arch_get_irn_reg_class(arg) == co->cls && "Argument not in same register class.");
427                         if (arg == irn)
428                                 continue;
429                         if (nodes_interfere(co->cenv, irn, arg)) {
430                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
431                                 continue;
432                         }
433
434                         /* Else insert the argument of the phi to the members of this ou */
435                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
436
437                         if (arch_irn_is_ignore(arg))
438                                 continue;
439
440                         /* Check if arg has occurred at a prior position in the arg/list */
441                         arg_pos = 0;
442                         for (o=1; o<unit->node_count; ++o) {
443                                 if (unit->nodes[o] == arg) {
444                                         arg_pos = o;
445                                         break;
446                                 }
447                         }
448
449                         if (!arg_pos) { /* a new argument */
450                                 /* insert node, set costs */
451                                 unit->nodes[unit->node_count] = arg;
452                                 unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
453                                 unit->node_count++;
454                         } else { /* arg has occurred before in same phi */
455                                 /* increase costs for existing arg */
456                                 unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
457                         }
458                 }
459                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
460                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
461         } else if (is_Perm_Proj(irn)) {
462                 /* Proj of a perm with corresponding arg */
463                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
464                 unit->nodes = XMALLOCN(ir_node*, 2);
465                 unit->costs = XMALLOCN(int,      2);
466                 unit->node_count = 2;
467                 unit->nodes[0] = irn;
468                 unit->nodes[1] = get_Perm_src(irn);
469                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
470         } else {
471                 /* Src == Tgt of a 2-addr-code instruction */
472                 if (is_2addr_code(req)) {
473                         const unsigned other = req->other_same;
474                         int            count = 0;
475                         int            i;
476
477                         for (i = 0; (1U << i) <= other; ++i) {
478                                 if (other & (1U << i)) {
479                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
480                                         if (arch_irn_is_ignore(o))
481                                                 continue;
482                                         if (nodes_interfere(co->cenv, irn, o))
483                                                 continue;
484                                         ++count;
485                                 }
486                         }
487
488                         if (count != 0) {
489                                 int k = 0;
490                                 ++count;
491                                 unit->nodes = XMALLOCN(ir_node*, count);
492                                 unit->costs = XMALLOCN(int,      count);
493                                 unit->node_count = count;
494                                 unit->nodes[k++] = irn;
495
496                                 for (i = 0; 1U << i <= other; ++i) {
497                                         if (other & (1U << i)) {
498                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
499                                                 if (!arch_irn_is_ignore(o) &&
500                                                                 !nodes_interfere(co->cenv, irn, o)) {
501                                                         unit->nodes[k] = o;
502                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
503                                                         ++k;
504                                                 }
505                                         }
506                                 }
507                         }
508                 } else {
509                         assert(0 && "This is not an optimizable node!");
510                 }
511         }
512
513         /* Insert the new unit at a position according to its costs */
514         if (unit->node_count > 1) {
515                 int i;
516                 struct list_head *tmp;
517
518                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
519                 for (i=1; i<unit->node_count; ++i) {
520                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
521                         unit->all_nodes_costs += unit->costs[i];
522                 }
523
524                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
525                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
526                 /* Insert the new ou according to its sort_key */
527                 tmp = &co->units;
528                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
529                         tmp = tmp->next;
530                 list_add(&unit->units, tmp);
531         } else {
532                 free(unit);
533         }
534 }
535
536 #ifdef QUICK_AND_DIRTY_HACK
537
538 static int compare_ous(const void *k1, const void *k2)
539 {
540         const unit_t *u1 = *((const unit_t **) k1);
541         const unit_t *u2 = *((const unit_t **) k2);
542         int i, o, u1_has_constr, u2_has_constr;
543         arch_register_req_t req;
544
545         /* Units with constraints come first */
546         u1_has_constr = 0;
547         for (i=0; i<u1->node_count; ++i) {
548                 arch_get_irn_register_req(&req, u1->nodes[i]);
549                 if (arch_register_req_is(&req, limited)) {
550                         u1_has_constr = 1;
551                         break;
552                 }
553         }
554
555         u2_has_constr = 0;
556         for (i=0; i<u2->node_count; ++i) {
557                 arch_get_irn_register_req(&req, u2->nodes[i]);
558                 if (arch_register_req_is(&req, limited)) {
559                         u2_has_constr = 1;
560                         break;
561                 }
562         }
563
564         if (u1_has_constr != u2_has_constr)
565                 return u2_has_constr - u1_has_constr;
566
567         /* Now check, whether the two units are connected */
568 #if 0
569         for (i=0; i<u1->node_count; ++i)
570                 for (o=0; o<u2->node_count; ++o)
571                         if (u1->nodes[i] == u2->nodes[o])
572                                 return 0;
573 #endif
574
575         /* After all, the sort key decides. Greater keys come first. */
576         return u2->sort_key - u1->sort_key;
577
578 }
579
580 /**
581  * Sort the ou's according to constraints and their sort_key
582  */
583 static void co_sort_units(copy_opt_t *co)
584 {
585         int i, count = 0, costs;
586         unit_t *ou, **ous;
587
588         /* get the number of ous, remove them form the list and fill the array */
589         list_for_each_entry(unit_t, ou, &co->units, units)
590                 count++;
591         ous = ALLOCAN(unit_t, count);
592
593         costs = co_get_max_copy_costs(co);
594
595         i = 0;
596         list_for_each_entry(unit_t, ou, &co->units, units)
597                 ous[i++] = ou;
598
599         INIT_LIST_HEAD(&co->units);
600
601         assert(count == i && list_empty(&co->units));
602
603         for (i=0; i<count; ++i)
604                 ir_printf("%+F\n", ous[i]->nodes[0]);
605
606         qsort(ous, count, sizeof(*ous), compare_ous);
607
608         ir_printf("\n\n");
609         for (i=0; i<count; ++i)
610                 ir_printf("%+F\n", ous[i]->nodes[0]);
611
612         /* reinsert into list in correct order */
613         for (i=0; i<count; ++i)
614                 list_add_tail(&ous[i]->units, &co->units);
615
616         assert(costs == co_get_max_copy_costs(co));
617 }
618 #endif
619
620 void co_build_ou_structure(copy_opt_t *co)
621 {
622         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
623         INIT_LIST_HEAD(&co->units);
624         irg_walk_graph(co->irg, co_collect_units, NULL, co);
625 #ifdef QUICK_AND_DIRTY_HACK
626         co_sort_units(co);
627 #endif
628 }
629
630 void co_free_ou_structure(copy_opt_t *co)
631 {
632         unit_t *curr, *tmp;
633         ASSERT_OU_AVAIL(co);
634         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
635                 xfree(curr->nodes);
636                 xfree(curr->costs);
637                 xfree(curr);
638         }
639         co->units.next = NULL;
640 }
641
642 /* co_solve_heuristic() is implemented in becopyheur.c */
643
644 int co_get_max_copy_costs(const copy_opt_t *co)
645 {
646         int i, res = 0;
647         unit_t *curr;
648
649         ASSERT_OU_AVAIL(co);
650
651         list_for_each_entry(unit_t, curr, &co->units, units) {
652                 res += curr->inevitable_costs;
653                 for (i=1; i<curr->node_count; ++i)
654                         res += curr->costs[i];
655         }
656         return res;
657 }
658
659 int co_get_inevit_copy_costs(const copy_opt_t *co)
660 {
661         int res = 0;
662         unit_t *curr;
663
664         ASSERT_OU_AVAIL(co);
665
666         list_for_each_entry(unit_t, curr, &co->units, units)
667                 res += curr->inevitable_costs;
668         return res;
669 }
670
671 int co_get_copy_costs(const copy_opt_t *co)
672 {
673         int i, res = 0;
674         unit_t *curr;
675
676         ASSERT_OU_AVAIL(co);
677
678         list_for_each_entry(unit_t, curr, &co->units, units) {
679                 int root_col = get_irn_col(curr->nodes[0]);
680                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
681                 res += curr->inevitable_costs;
682                 for (i=1; i<curr->node_count; ++i) {
683                         int arg_col = get_irn_col(curr->nodes[i]);
684                         if (root_col != arg_col) {
685                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
686                                 res += curr->costs[i];
687                         }
688                 }
689         }
690         return res;
691 }
692
693 int co_get_lower_bound(const copy_opt_t *co)
694 {
695         int res = 0;
696         unit_t *curr;
697
698         ASSERT_OU_AVAIL(co);
699
700         list_for_each_entry(unit_t, curr, &co->units, units)
701                 res += curr->inevitable_costs + curr->min_nodes_costs;
702         return res;
703 }
704
705 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
706 {
707         bitset_t *seen = bitset_irg_malloc(co->irg);
708         affinity_node_t *an;
709
710         memset(stat, 0, sizeof(stat[0]));
711
712         /* count affinity edges. */
713         co_gs_foreach_aff_node(co, an) {
714                 neighb_t *neigh;
715                 stat->aff_nodes += 1;
716                 bitset_add_irn(seen, an->irn);
717                 co_gs_foreach_neighb(an, neigh) {
718                         if (!bitset_contains_irn(seen, neigh->irn)) {
719                                 stat->aff_edges += 1;
720                                 stat->max_costs += neigh->costs;
721
722                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
723                                         stat->costs += neigh->costs;
724                                         stat->unsatisfied_edges += 1;
725                                 }
726
727                                 if (nodes_interfere(co->cenv, an->irn, neigh->irn)) {
728                                         stat->aff_int += 1;
729                                         stat->inevit_costs += neigh->costs;
730                                 }
731
732                         }
733                 }
734         }
735
736         bitset_free(seen);
737 }
738
739 /******************************************************************************
740    _____                 _        _____ _
741   / ____|               | |      / ____| |
742  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
743  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
744  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
745   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
746                   | |                                       __/ |
747                   |_|                                      |___/
748  ******************************************************************************/
749
750 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size)
751 {
752         const affinity_node_t *n1 = (const affinity_node_t*)k1;
753         const affinity_node_t *n2 = (const affinity_node_t*)k2;
754         (void) size;
755
756         return (n1->irn != n2->irn);
757 }
758
759 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
760 {
761         affinity_node_t new_node, *node;
762         neighb_t        *nbr;
763         int             allocnew = 1;
764
765         new_node.irn        = n1;
766         new_node.degree     = 0;
767         new_node.neighbours = NULL;
768         node = (affinity_node_t*)set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
769
770         for (nbr = node->neighbours; nbr; nbr = nbr->next)
771                 if (nbr->irn == n2) {
772                         allocnew = 0;
773                         break;
774                 }
775
776         /* if we did not find n2 in n1's neighbourhood insert it */
777         if (allocnew) {
778                 nbr        = OALLOC(&co->obst, neighb_t);
779                 nbr->irn   = n2;
780                 nbr->costs = 0;
781                 nbr->next  = node->neighbours;
782
783                 node->neighbours = nbr;
784                 node->degree++;
785         }
786
787         /* now nbr points to n1's neighbour-entry of n2 */
788         nbr->costs += costs;
789 }
790
791 static inline void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs)
792 {
793         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
794                 add_edge(co, n1, n2, costs);
795                 add_edge(co, n2, n1, costs);
796         }
797 }
798
799 static void build_graph_walker(ir_node *irn, void *env)
800 {
801         const arch_register_req_t *req;
802         copy_opt_t                *co  = (copy_opt_t*)env;
803         int pos, max;
804
805         if (get_irn_mode(irn) == mode_T)
806                 return;
807         req = arch_get_irn_register_req(irn);
808         if (req->cls != co->cls || arch_irn_is_ignore(irn))
809                 return;
810
811         if (is_Reg_Phi(irn)) { /* Phis */
812                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
813                         ir_node *arg = get_irn_n(irn, pos);
814                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
815                 }
816         } else if (is_Perm_Proj(irn)) { /* Perms */
817                 ir_node *arg = get_Perm_src(irn);
818                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
819         } else { /* 2-address code */
820                 if (is_2addr_code(req)) {
821                         const unsigned other = req->other_same;
822                         int i;
823
824                         for (i = 0; 1U << i <= other; ++i) {
825                                 if (other & (1U << i)) {
826                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
827                                         if (!arch_irn_is_ignore(other))
828                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
829                                 }
830                         }
831                 }
832         }
833 }
834
835 void co_build_graph_structure(copy_opt_t *co)
836 {
837         obstack_init(&co->obst);
838         co->nodes = new_set(compare_affinity_node_t, 32);
839
840         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
841 }
842
843 void co_free_graph_structure(copy_opt_t *co)
844 {
845         ASSERT_GS_AVAIL(co);
846
847         del_set(co->nodes);
848         obstack_free(&co->obst, NULL);
849         co->nodes = NULL;
850 }
851
852 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn)
853 {
854         affinity_node_t new_node, *n;
855
856         ASSERT_GS_AVAIL(co);
857
858         new_node.irn = irn;
859         n = (affinity_node_t*)set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
860         if (n) {
861                 return (n->degree > 0);
862         } else
863                 return 0;
864 }
865
866 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
867 {
868         ir_node *nodes[]  = { a, b };
869         bitset_t *constr[] = { NULL, NULL };
870         int j;
871
872         constr[0] = bitset_alloca(co->cls->n_regs);
873         constr[1] = bitset_alloca(co->cls->n_regs);
874
875         for (j = 0; j < 2; ++j) {
876                 const arch_register_req_t *req = arch_get_irn_register_req(nodes[j]);
877                 if (arch_register_req_is(req, limited))
878                         rbitset_copy_to_bitset(req->limited, constr[j]);
879                 else
880                         bitset_set_all(constr[j]);
881
882         }
883
884         return !bitset_intersect(constr[0], constr[1]);
885 }
886
887 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
888 {
889         be_ifg_t *ifg       = co->cenv->ifg;
890         int      *color_map = ALLOCAN(int, co->cls->n_regs);
891         int      *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
892         ir_graph *irg       = co->irg;
893         be_irg_t *birg      = be_birg_from_irg(irg);
894
895         ir_node *irn;
896         nodes_iter_t it;
897         neighbours_iter_t nit;
898         int n, n_regs;
899         unsigned i;
900
901         n_regs = 0;
902         for (i = 0; i < co->cls->n_regs; ++i) {
903                 const arch_register_t *reg = &co->cls->regs[i];
904                 if (rbitset_is_set(birg->allocatable_regs, reg->global_index)) {
905                         color_map[i] = n_regs++;
906                 } else {
907                         color_map[i] = -1;
908                 }
909         }
910
911         /*
912          * n contains the first node number.
913          * the values below n are the pre-colored register nodes
914          */
915
916         n = n_regs;
917         be_ifg_foreach_node(ifg, &it, irn) {
918                 if (arch_irn_is_ignore(irn))
919                         continue;
920                 node_map[get_irn_idx(irn)] = n++;
921         }
922
923         fprintf(f, "%d %d\n", n, n_regs);
924
925         be_ifg_foreach_node(ifg, &it, irn) {
926                 if (!arch_irn_is_ignore(irn)) {
927                         int idx                        = node_map[get_irn_idx(irn)];
928                         affinity_node_t           *a   = get_affinity_info(co, irn);
929                         const arch_register_req_t *req = arch_get_irn_register_req(irn);
930                         ir_node                   *adj;
931
932                         if (arch_register_req_is(req, limited)) {
933                                 for (i = 0; i < co->cls->n_regs; ++i) {
934                                         if (!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
935                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
936                                 }
937                         }
938
939                         be_ifg_foreach_neighbour(ifg, &nit, irn, adj) {
940                                 if (!arch_irn_is_ignore(adj) &&
941                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
942                                         int adj_idx = node_map[get_irn_idx(adj)];
943                                         if (idx < adj_idx)
944                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
945                                 }
946                         }
947
948                         if (a) {
949                                 neighb_t *n;
950
951                                 co_gs_foreach_neighb(a, n) {
952                                         if (!arch_irn_is_ignore(n->irn)) {
953                                                 int n_idx = node_map[get_irn_idx(n->irn)];
954                                                 if (idx < n_idx)
955                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
956                                         }
957                                 }
958                         }
959                 }
960         }
961
962         xfree(node_map);
963 }
964
965 /*
966          ___ _____ ____   ____   ___ _____   ____                        _
967         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
968          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
969          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
970         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
971                                                                   |_|            |___/
972 */
973
974 static const char *get_dot_color_name(size_t col)
975 {
976         static const char *names[] = {
977                 "blue",
978                 "red",
979                 "green",
980                 "yellow",
981                 "cyan",
982                 "magenta",
983                 "orange",
984                 "chocolate",
985                 "beige",
986                 "navy",
987                 "darkgreen",
988                 "darkred",
989                 "lightPink",
990                 "chartreuse",
991                 "lightskyblue",
992                 "linen",
993                 "pink",
994                 "lightslateblue",
995                 "mintcream",
996                 "red",
997                 "darkolivegreen",
998                 "mediumblue",
999                 "mistyrose",
1000                 "salmon",
1001                 "darkseagreen",
1002                 "mediumslateblue"
1003                 "moccasin",
1004                 "tomato",
1005                 "forestgreen",
1006                 "darkturquoise",
1007                 "palevioletred"
1008         };
1009
1010         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
1011 }
1012
1013 typedef struct co_ifg_dump_t {
1014         const copy_opt_t *co;
1015         unsigned flags;
1016 } co_ifg_dump_t;
1017
1018 static void ifg_dump_graph_attr(FILE *f, void *self)
1019 {
1020         (void) self;
1021         fprintf(f, "overlap=scale");
1022 }
1023
1024 static int ifg_is_dump_node(void *self, ir_node *irn)
1025 {
1026         (void)self;
1027         return !arch_irn_is_ignore(irn);
1028 }
1029
1030 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1031 {
1032         co_ifg_dump_t             *env     = (co_ifg_dump_t*)self;
1033         const arch_register_t     *reg     = arch_get_irn_register(irn);
1034         const arch_register_req_t *req     = arch_get_irn_register_req(irn);
1035         int                        limited = arch_register_req_is(req, limited);
1036
1037         if (env->flags & CO_IFG_DUMP_LABELS) {
1038                 ir_fprintf(f, "label=\"%+F", irn);
1039
1040                 if ((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1041                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1042                         rbitset_copy_to_bitset(req->limited, bs);
1043                         ir_fprintf(f, "\\n%B", bs);
1044                 }
1045                 ir_fprintf(f, "\" ");
1046         } else {
1047                 fprintf(f, "label=\"\" shape=point " );
1048         }
1049
1050         if (env->flags & CO_IFG_DUMP_SHAPE)
1051                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1052
1053         if (env->flags & CO_IFG_DUMP_COLORS)
1054                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1055 }
1056
1057 static void ifg_dump_at_end(FILE *file, void *self)
1058 {
1059         co_ifg_dump_t *env = (co_ifg_dump_t*)self;
1060         affinity_node_t *a;
1061
1062         co_gs_foreach_aff_node(env->co, a) {
1063                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1064                 unsigned aidx = get_irn_idx(a->irn);
1065                 neighb_t *n;
1066
1067                 co_gs_foreach_neighb(a, n) {
1068                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1069                         unsigned nidx = get_irn_idx(n->irn);
1070
1071                         if (aidx < nidx) {
1072                                 const char *color = nr == ar ? "blue" : "red";
1073                                 fprintf(file, "\tn%u -- n%u [weight=0.01 ", aidx, nidx);
1074                                 if (env->flags & CO_IFG_DUMP_LABELS)
1075                                         fprintf(file, "label=\"%d\" ", n->costs);
1076                                 if (env->flags & CO_IFG_DUMP_COLORS)
1077                                         fprintf(file, "color=%s ", color);
1078                                 else
1079                                         fprintf(file, "style=dotted");
1080                                 fprintf(file, "];\n");
1081                         }
1082                 }
1083         }
1084 }
1085
1086
1087 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1088         ifg_is_dump_node,
1089         ifg_dump_graph_attr,
1090         ifg_dump_node_attr,
1091         NULL,
1092         NULL,
1093         ifg_dump_at_end
1094 };
1095
1096
1097
1098 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1099 {
1100         co_ifg_dump_t cod;
1101
1102         cod.co    = co;
1103         cod.flags = flags;
1104         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1105 }
1106
1107
1108 void co_solve_park_moon(copy_opt_t *opt)
1109 {
1110         (void) opt;
1111 }
1112
1113 /*
1114     __  __       _         ____       _
1115    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1116    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1117    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1118    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1119
1120 */
1121
1122 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1123 {
1124         FILE *result;
1125         char buf[1024];
1126         size_t i, n;
1127         char *tu_name;
1128         const char *cup_name = be_get_irg_main_env(env->irg)->cup_name;
1129
1130         n = strlen(cup_name);
1131         tu_name = XMALLOCN(char, n + 1);
1132         strcpy(tu_name, cup_name);
1133         for (i = 0; i < n; ++i)
1134                 if (tu_name[i] == '.')
1135                         tu_name[i] = '_';
1136
1137
1138         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1139         xfree(tu_name);
1140         result = fopen(buf, "wt");
1141         if (result == NULL) {
1142                 panic("Couldn't open '%s' for writing.", buf);
1143         }
1144
1145         return result;
1146 }
1147
1148 void co_driver(be_chordal_env_t *cenv)
1149 {
1150         ir_timer_t          *timer = ir_timer_new();
1151         co_complete_stats_t before, after;
1152         copy_opt_t          *co;
1153         int                 was_optimal = 0;
1154
1155         assert(selected_copyopt);
1156
1157         /* skip copymin if algo is 'none' */
1158         if (selected_copyopt->copyopt == void_algo)
1159                 return;
1160
1161         be_liveness_assure_chk(be_get_irg_liveness(cenv->irg));
1162
1163         co = new_copy_opt(cenv, cost_func);
1164         co_build_ou_structure(co);
1165         co_build_graph_structure(co);
1166
1167         co_complete_stats(co, &before);
1168
1169         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1170         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1171         be_stat_ev_ull("co_max_costs",    before.max_costs);
1172         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1173         be_stat_ev_ull("co_aff_int",      before.aff_int);
1174
1175         be_stat_ev_ull("co_init_costs",   before.costs);
1176         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1177
1178         if (dump_flags & DUMP_BEFORE) {
1179                 FILE *f = my_open(cenv, "", "-before.dot");
1180                 co_dump_ifg_dot(co, f, style_flags);
1181                 fclose(f);
1182         }
1183
1184         /* if the algo can improve results, provide an initial solution with heur1 */
1185         if (improve && selected_copyopt->can_improve_existing) {
1186                 co_complete_stats_t stats;
1187
1188                 /* produce a heuristic solution */
1189                 co_solve_heuristic(co);
1190
1191                 /* do the stats and provide the current costs */
1192                 co_complete_stats(co, &stats);
1193                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1194         }
1195
1196         /* perform actual copy minimization */
1197         ir_timer_reset_and_start(timer);
1198         was_optimal = selected_copyopt->copyopt(co);
1199         ir_timer_stop(timer);
1200
1201         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1202         be_stat_ev_ull("co_optimal", was_optimal);
1203         ir_timer_free(timer);
1204
1205         if (dump_flags & DUMP_AFTER) {
1206                 FILE *f = my_open(cenv, "", "-after.dot");
1207                 co_dump_ifg_dot(co, f, style_flags);
1208                 fclose(f);
1209         }
1210
1211         co_complete_stats(co, &after);
1212
1213         if (do_stats) {
1214                 unsigned long long optimizable_costs = after.max_costs - after.inevit_costs;
1215                 unsigned long long evitable          = after.costs     - after.inevit_costs;
1216
1217                 ir_printf("%30F ", cenv->irg);
1218                 printf("%10s %10llu%10llu%10llu", cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1219
1220                 if (optimizable_costs > 0)
1221                         printf("%10llu %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1222                 else
1223                         printf("%10llu %5s\n", after.costs, "-");
1224         }
1225
1226         /* Dump the interference graph in Appel's format. */
1227         if (dump_flags & DUMP_APPEL) {
1228                 FILE *f = my_open(cenv, "", ".apl");
1229                 fprintf(f, "# %llu %llu\n", after.costs, after.unsatisfied_edges);
1230                 co_dump_appel_graph(co, f);
1231                 fclose(f);
1232         }
1233
1234         be_stat_ev_ull("co_after_costs", after.costs);
1235         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1236
1237         co_free_graph_structure(co);
1238         co_free_ou_structure(co);
1239         free_copy_opt(co);
1240 }