Remove the unused parameter const arch_env_t *env from arch_irn_get_flags(), arch_irn...
[libfirm] / ir / be / becopyopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief       Copy minimization driver.
23  * @author      Daniel Grund
24  * @date        12.04.2005
25  * @version     $Id$
26  *
27  * Main file for the optimization reducing the copies needed for:
28  * - Phi coalescing
29  * - Register-constrained nodes
30  * - Two-address code instructions
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include "execfreq.h"
37 #include "xmalloc.h"
38 #include "debug.h"
39 #include "pmap.h"
40 #include "raw_bitset.h"
41 #include "irnode.h"
42 #include "irgraph.h"
43 #include "irgwalk.h"
44 #include "irprog.h"
45 #include "irloop_t.h"
46 #include "iredges_t.h"
47 #include "phiclass.h"
48 #include "irbitset.h"
49 #include "irphase_t.h"
50 #include "irprintf_t.h"
51
52 #include "bemodule.h"
53 #include "bearch_t.h"
54 #include "benode_t.h"
55 #include "beutil.h"
56 #include "beifg_t.h"
57 #include "beintlive_t.h"
58 #include "becopyopt_t.h"
59 #include "becopystat.h"
60 #include "belive_t.h"
61 #include "beinsn_t.h"
62 #include "besched_t.h"
63 #include "bestatevent.h"
64 #include "beirg_t.h"
65 #include "error.h"
66
67 #include "lc_opts.h"
68 #include "lc_opts_enum.h"
69
70 #define DUMP_BEFORE 1
71 #define DUMP_AFTER  2
72 #define DUMP_APPEL  4
73 #define DUMP_ALL    2 * DUMP_APPEL - 1
74
75 #define COST_FUNC_FREQ     1
76 #define COST_FUNC_LOOP     2
77 #define COST_FUNC_ALL_ONE  3
78
79 static unsigned   dump_flags  = 0;
80 static unsigned   style_flags = 0;
81 static unsigned   do_stats    = 0;
82 static cost_fct_t cost_func   = co_get_costs_exec_freq;
83 static unsigned   algo        = CO_ALGO_HEUR4;
84 static int        improve     = 1;
85
86 static const lc_opt_enum_mask_items_t dump_items[] = {
87         { "before",  DUMP_BEFORE },
88         { "after",   DUMP_AFTER  },
89         { "appel",   DUMP_APPEL  },
90         { "all",     DUMP_ALL    },
91         { NULL,      0 }
92 };
93
94 static const lc_opt_enum_mask_items_t style_items[] = {
95         { "color",   CO_IFG_DUMP_COLORS },
96         { "labels",  CO_IFG_DUMP_LABELS },
97         { "constr",  CO_IFG_DUMP_CONSTR },
98         { "shape",   CO_IFG_DUMP_SHAPE  },
99         { "full",    2 * CO_IFG_DUMP_SHAPE - 1 },
100         { NULL,      0 }
101 };
102
103 static const lc_opt_enum_mask_items_t algo_items[] = {
104         { "none",   CO_ALGO_NONE  },
105         { "heur",   CO_ALGO_HEUR  },
106         { "heur2",  CO_ALGO_HEUR2 },
107         { "heur3",  CO_ALGO_HEUR3 },
108         { "heur4",  CO_ALGO_HEUR4 },
109         { "ilp",    CO_ALGO_ILP   },
110         { NULL,     0 }
111 };
112
113 typedef int (*opt_funcptr)(void);
114
115 static const lc_opt_enum_func_ptr_items_t cost_func_items[] = {
116         { "freq",   (opt_funcptr) co_get_costs_exec_freq },
117         { "loop",   (opt_funcptr) co_get_costs_loop_depth },
118         { "one",    (opt_funcptr) co_get_costs_all_one },
119         { NULL,     NULL }
120 };
121
122 static lc_opt_enum_mask_var_t dump_var = {
123         &dump_flags, dump_items
124 };
125
126 static lc_opt_enum_mask_var_t style_var = {
127         &style_flags, style_items
128 };
129
130 static lc_opt_enum_mask_var_t algo_var = {
131         &algo, algo_items
132 };
133
134 static lc_opt_enum_func_ptr_var_t cost_func_var = {
135         (opt_funcptr*) &cost_func, cost_func_items
136 };
137
138 static const lc_opt_table_entry_t options[] = {
139         LC_OPT_ENT_ENUM_INT      ("algo",    "select copy optimization algo",                           &algo_var),
140         LC_OPT_ENT_ENUM_FUNC_PTR ("cost",    "select a cost function",                                  &cost_func_var),
141         LC_OPT_ENT_ENUM_MASK     ("dump",    "dump ifg before or after copy optimization",              &dump_var),
142         LC_OPT_ENT_ENUM_MASK     ("style",   "dump style for ifg dumping",                              &style_var),
143         LC_OPT_ENT_BOOL          ("stats",   "dump statistics after each optimization",                 &do_stats),
144         LC_OPT_ENT_BOOL          ("improve", "run heur3 before if algo can exploit start solutions",    &improve),
145         LC_OPT_LAST
146 };
147
148 /* Insert additional options registration functions here. */
149 extern void be_co_ilp_register_options(lc_opt_entry_t *grp);
150 extern void be_co2_register_options(lc_opt_entry_t *grp);
151 extern void be_co3_register_options(lc_opt_entry_t *grp);
152
153 void be_init_copycoal(void)
154 {
155         lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
156         lc_opt_entry_t *ra_grp = lc_opt_get_grp(be_grp, "ra");
157         lc_opt_entry_t *chordal_grp = lc_opt_get_grp(ra_grp, "chordal");
158         lc_opt_entry_t *co_grp = lc_opt_get_grp(chordal_grp, "co");
159
160         lc_opt_add_table(co_grp, options);
161 }
162
163 BE_REGISTER_MODULE_CONSTRUCTOR(be_init_copycoal);
164
165 #undef QUICK_AND_DIRTY_HACK
166
167 static int nodes_interfere(const be_chordal_env_t *env, const ir_node *a, const ir_node *b)
168 {
169         if (env->ifg)
170                 return be_ifg_connected(env->ifg, a, b);
171         else
172                 return values_interfere(env->birg, a, b);
173 }
174
175
176 /******************************************************************************
177     _____                           _
178    / ____|                         | |
179   | |  __  ___ _ __   ___ _ __ __ _| |
180   | | |_ |/ _ \ '_ \ / _ \ '__/ _` | |
181   | |__| |  __/ | | |  __/ | | (_| | |
182    \_____|\___|_| |_|\___|_|  \__,_|_|
183
184  ******************************************************************************/
185
186 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
187
188
189 copy_opt_t *new_copy_opt(be_chordal_env_t *chordal_env, cost_fct_t get_costs)
190 {
191         const char *s1, *s2, *s3;
192         int len;
193         copy_opt_t *co;
194
195         FIRM_DBG_REGISTER(dbg, "ir.be.copyopt");
196
197         co = XMALLOCZ(copy_opt_t);
198         co->cenv      = chordal_env;
199         co->aenv      = chordal_env->birg->main_env->arch_env;
200         co->irg       = chordal_env->irg;
201         co->cls       = chordal_env->cls;
202         co->get_costs = get_costs;
203
204         s1 = get_irp_prog_name();
205         s2 = get_entity_name(get_irg_entity(co->irg));
206         s3 = chordal_env->cls->name;
207         len = strlen(s1) + strlen(s2) + strlen(s3) + 5;
208         co->name = XMALLOCN(char, len);
209         snprintf(co->name, len, "%s__%s__%s", s1, s2, s3);
210
211         return co;
212 }
213
214 void free_copy_opt(copy_opt_t *co) {
215         xfree(co->name);
216         free(co);
217 }
218
219 int co_is_optimizable_root(const copy_opt_t *co, ir_node *irn) {
220         const arch_register_req_t *req;
221         const arch_register_t *reg;
222         (void)co; // TODO remove parameter
223
224         if (arch_irn_is(irn, ignore))
225                 return 0;
226
227         reg = arch_get_irn_register(irn);
228         if (arch_register_type_is(reg, ignore))
229                 return 0;
230
231         req = arch_get_register_req(irn, -1);
232         if (is_Reg_Phi(irn) || is_Perm_Proj(irn) || is_2addr_code(req))
233                 return 1;
234
235         return 0;
236 }
237
238 int co_get_costs_loop_depth(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
239         int cost = 0;
240         ir_loop *loop;
241         ir_node *root_block = get_nodes_block(root);
242         (void) co;
243         (void) arg;
244
245         if (is_Phi(root)) {
246                 /* for phis the copies are placed in the corresponding pred-block */
247                 loop = get_irn_loop(get_Block_cfgpred_block(root_block, pos));
248         } else {
249                 /* a perm places the copy in the same block as it resides */
250                 loop = get_irn_loop(root_block);
251         }
252         if (loop) {
253                 int d = get_loop_depth(loop);
254                 cost = d*d;
255         }
256         return 1+cost;
257 }
258
259 int co_get_costs_exec_freq(const copy_opt_t *co, ir_node *root, ir_node* arg, int pos) {
260         int res;
261         ir_node *root_bl = get_nodes_block(root);
262         ir_node *copy_bl = is_Phi(root) ? get_Block_cfgpred_block(root_bl, pos) : root_bl;
263         (void) arg;
264         res = get_block_execfreq_ulong(co->cenv->birg->exec_freq, copy_bl);
265
266         /* don't allow values smaller than one. */
267         return res < 1 ? 1 : res;
268 }
269
270
271 int co_get_costs_all_one(const copy_opt_t *co, ir_node *root, ir_node *arg, int pos) {
272         (void) co;
273         (void) root;
274         (void) arg;
275         (void) pos;
276         return 1;
277 }
278
279 /******************************************************************************
280    ____        _   _    _       _ _          _____ _
281   / __ \      | | | |  | |     (_) |        / ____| |
282  | |  | |_ __ | |_| |  | |_ __  _| |_ ___  | (___ | |_ ___  _ __ __ _  __ _  ___
283  | |  | | '_ \| __| |  | | '_ \| | __/ __|  \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
284  | |__| | |_) | |_| |__| | | | | | |_\__ \  ____) | || (_) | | | (_| | (_| |  __/
285   \____/| .__/ \__|\____/|_| |_|_|\__|___/ |_____/ \__\___/|_|  \__,_|\__, |\___|
286         | |                                                            __/ |
287         |_|                                                           |___/
288  ******************************************************************************/
289
290 /**
291  * Determines a maximum weighted independent set with respect to
292  * the interference and conflict edges of all nodes in a qnode.
293  */
294 static int ou_max_ind_set_costs(unit_t *ou) {
295         be_chordal_env_t *chordal_env = ou->co->cenv;
296         ir_node **safe, **unsafe;
297         int i, o, safe_count, safe_costs, unsafe_count, *unsafe_costs;
298         bitset_t *curr;
299         bitset_pos_t pos;
300         int max, curr_weight, best_weight = 0;
301
302         /* assign the nodes into two groups.
303          * safe: node has no interference, hence it is in every max stable set.
304          * unsafe: node has an interference
305          */
306         safe = alloca((ou->node_count-1) * sizeof(*safe));
307         safe_costs = 0;
308         safe_count = 0;
309         unsafe = alloca((ou->node_count-1) * sizeof(*unsafe));
310         unsafe_costs = alloca((ou->node_count-1) * sizeof(*unsafe_costs));
311         unsafe_count = 0;
312         for(i=1; i<ou->node_count; ++i) {
313                 int is_safe = 1;
314                 for(o=1; o<ou->node_count; ++o) {
315                         if (i==o)
316                                 continue;
317                         if (nodes_interfere(chordal_env, ou->nodes[i], ou->nodes[o])) {
318                                 unsafe_costs[unsafe_count] = ou->costs[i];
319                                 unsafe[unsafe_count] = ou->nodes[i];
320                                 ++unsafe_count;
321                                 is_safe = 0;
322                                 break;
323                         }
324                 }
325                 if (is_safe) {
326                         safe_costs += ou->costs[i];
327                         safe[safe_count++] = ou->nodes[i];
328                 }
329         }
330
331
332         /* now compute the best set out of the unsafe nodes*/
333         if (unsafe_count > MIS_HEUR_TRIGGER) {
334                 bitset_t *best = bitset_alloca(unsafe_count);
335                 /* Heuristik: Greedy trial and error form index 0 to unsafe_count-1 */
336                 for (i=0; i<unsafe_count; ++i) {
337                         bitset_set(best, i);
338                         /* check if it is a stable set */
339                         for (o=bitset_next_set(best, 0); o!=-1 && o<i; o=bitset_next_set(best, o+1))
340                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o])) {
341                                         bitset_clear(best, i); /* clear the bit and try next one */
342                                         break;
343                                 }
344                 }
345                 /* compute the weight */
346                 bitset_foreach(best, pos)
347                         best_weight += unsafe_costs[pos];
348         } else {
349                 /* Exact Algorithm: Brute force */
350                 curr = bitset_alloca(unsafe_count);
351                 bitset_set_all(curr);
352                 while ((max = bitset_popcnt(curr)) != 0) {
353                         /* check if curr is a stable set */
354                         for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
355                                 for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
356                                                 if (nodes_interfere(chordal_env, unsafe[i], unsafe[o]))
357                                                         goto no_stable_set;
358
359                         /* if we arrive here, we have a stable set */
360                         /* compute the weigth of the stable set*/
361                         curr_weight = 0;
362                         bitset_foreach(curr, pos)
363                                 curr_weight += unsafe_costs[pos];
364
365                         /* any better ? */
366                         if (curr_weight > best_weight) {
367                                 best_weight = curr_weight;
368                         }
369
370         no_stable_set:
371                         bitset_minus1(curr);
372                 }
373         }
374
375         return safe_costs+best_weight;
376 }
377
378 static void co_collect_units(ir_node *irn, void *env) {
379         copy_opt_t *co = env;
380         unit_t *unit;
381
382         if (!is_curr_reg_class(co, irn))
383                 return;
384         if (!co_is_optimizable_root(co, irn))
385                 return;
386
387         /* Init a new unit */
388         unit = XMALLOCZ(unit_t);
389         unit->co = co;
390         unit->node_count = 1;
391         INIT_LIST_HEAD(&unit->queue);
392
393         /* Phi with some/all of its arguments */
394         if (is_Reg_Phi(irn)) {
395                 int i, arity;
396
397                 /* init */
398                 arity = get_irn_arity(irn);
399                 unit->nodes = XMALLOCN(ir_node*, arity + 1);
400                 unit->costs = XMALLOCN(int,      arity + 1);
401                 unit->nodes[0] = irn;
402
403                 /* fill */
404                 for (i=0; i<arity; ++i) {
405                         int o, arg_pos;
406                         ir_node *arg = get_irn_n(irn, i);
407
408                         assert(is_curr_reg_class(co, arg) && "Argument not in same register class.");
409                         if (arg == irn)
410                                 continue;
411                         if (nodes_interfere(co->cenv, irn, arg)) {
412                                 unit->inevitable_costs += co->get_costs(co, irn, arg, i);
413                                 continue;
414                         }
415
416                         /* Else insert the argument of the phi to the members of this ou */
417                         DBG((dbg, LEVEL_1, "\t   Member: %+F\n", arg));
418
419                         if (!arch_irn_is(arg, ignore)) {
420                                 /* Check if arg has occurred at a prior position in the arg/list */
421                                 arg_pos = 0;
422                                 for (o=1; o<unit->node_count; ++o) {
423                                         if (unit->nodes[o] == arg) {
424                                                 arg_pos = o;
425                                                 break;
426                                         }
427                                 }
428
429                                 if (!arg_pos) { /* a new argument */
430                                         /* insert node, set costs */
431                                         unit->nodes[unit->node_count] = arg;
432                                         unit->costs[unit->node_count] = co->get_costs(co, irn, arg, i);
433                                         unit->node_count++;
434                                 } else { /* arg has occurred before in same phi */
435                                         /* increase costs for existing arg */
436                                         unit->costs[arg_pos] += co->get_costs(co, irn, arg, i);
437                                 }
438                         }
439                 }
440                 unit->nodes = XREALLOC(unit->nodes, ir_node*, unit->node_count);
441                 unit->costs = XREALLOC(unit->costs, int,      unit->node_count);
442         } else if (is_Perm_Proj(irn)) {
443                 /* Proj of a perm with corresponding arg */
444                 assert(!nodes_interfere(co->cenv, irn, get_Perm_src(irn)));
445                 unit->nodes = XMALLOCN(ir_node*, 2);
446                 unit->costs = XMALLOCN(int,      2);
447                 unit->node_count = 2;
448                 unit->nodes[0] = irn;
449                 unit->nodes[1] = get_Perm_src(irn);
450                 unit->costs[1] = co->get_costs(co, irn, unit->nodes[1], -1);
451         } else {
452                 const arch_register_req_t *req = arch_get_register_req(irn, -1);
453
454                 /* Src == Tgt of a 2-addr-code instruction */
455                 if (is_2addr_code(req)) {
456                         const unsigned other = req->other_same;
457                         int            count = 0;
458                         int            i;
459
460                         for (i = 0; (1U << i) <= other; ++i) {
461                                 if (other & (1U << i)) {
462                                         ir_node *o  = get_irn_n(skip_Proj(irn), i);
463                                         if (!arch_irn_is(o, ignore) &&
464                                                         !nodes_interfere(co->cenv, irn, o)) {
465                                                 ++count;
466                                         }
467                                 }
468                         }
469
470                         if (count != 0) {
471                                 int k = 0;
472                                 ++count;
473                                 unit->nodes = XMALLOCN(ir_node*, count);
474                                 unit->costs = XMALLOCN(int,      count);
475                                 unit->node_count = count;
476                                 unit->nodes[k++] = irn;
477
478                                 for (i = 0; 1U << i <= other; ++i) {
479                                         if (other & (1U << i)) {
480                                                 ir_node *o  = get_irn_n(skip_Proj(irn), i);
481                                                 if (!arch_irn_is(o, ignore) &&
482                                                                 !nodes_interfere(co->cenv, irn, o)) {
483                                                         unit->nodes[k] = o;
484                                                         unit->costs[k] = co->get_costs(co, irn, o, -1);
485                                                         ++k;
486                                                 }
487                                         }
488                                 }
489                         }
490                 } else {
491                         assert(0 && "This is not an optimizable node!");
492                 }
493         }
494
495         /* Insert the new unit at a position according to its costs */
496         if (unit->node_count > 1) {
497                 int i;
498                 struct list_head *tmp;
499
500                 /* Determine the maximum costs this unit can cause: all_nodes_cost */
501                 for(i=1; i<unit->node_count; ++i) {
502                         unit->sort_key = MAX(unit->sort_key, unit->costs[i]);
503                         unit->all_nodes_costs += unit->costs[i];
504                 }
505
506                 /* Determine the minimal costs this unit will cause: min_nodes_costs */
507                 unit->min_nodes_costs += unit->all_nodes_costs - ou_max_ind_set_costs(unit);
508                 /* Insert the new ou according to its sort_key */
509                 tmp = &co->units;
510                 while (tmp->next != &co->units && list_entry_units(tmp->next)->sort_key > unit->sort_key)
511                         tmp = tmp->next;
512                 list_add(&unit->units, tmp);
513         } else {
514                 free(unit);
515         }
516 }
517
518 #ifdef QUICK_AND_DIRTY_HACK
519
520 static int compare_ous(const void *k1, const void *k2) {
521         const unit_t *u1 = *((const unit_t **) k1);
522         const unit_t *u2 = *((const unit_t **) k2);
523         int i, o, u1_has_constr, u2_has_constr;
524         arch_register_req_t req;
525
526         /* Units with constraints come first */
527         u1_has_constr = 0;
528         for (i=0; i<u1->node_count; ++i) {
529                 arch_get_register_req(&req, u1->nodes[i], -1);
530                 if (arch_register_req_is(&req, limited)) {
531                         u1_has_constr = 1;
532                         break;
533                 }
534         }
535
536         u2_has_constr = 0;
537         for (i=0; i<u2->node_count; ++i) {
538                 arch_get_register_req(&req, u2->nodes[i], -1);
539                 if (arch_register_req_is(&req, limited)) {
540                         u2_has_constr = 1;
541                         break;
542                 }
543         }
544
545         if (u1_has_constr != u2_has_constr)
546                 return u2_has_constr - u1_has_constr;
547
548         /* Now check, whether the two units are connected */
549 #if 0
550         for (i=0; i<u1->node_count; ++i)
551                 for (o=0; o<u2->node_count; ++o)
552                         if (u1->nodes[i] == u2->nodes[o])
553                                 return 0;
554 #endif
555
556         /* After all, the sort key decides. Greater keys come first. */
557         return u2->sort_key - u1->sort_key;
558
559 }
560
561 /**
562  * Sort the ou's according to constraints and their sort_key
563  */
564 static void co_sort_units(copy_opt_t *co) {
565         int i, count = 0, costs;
566         unit_t *ou, **ous;
567
568         /* get the number of ous, remove them form the list and fill the array */
569         list_for_each_entry(unit_t, ou, &co->units, units)
570                 count++;
571         ous = alloca(count * sizeof(*ous));
572
573         costs = co_get_max_copy_costs(co);
574
575         i = 0;
576         list_for_each_entry(unit_t, ou, &co->units, units)
577                 ous[i++] = ou;
578
579         INIT_LIST_HEAD(&co->units);
580
581         assert(count == i && list_empty(&co->units));
582
583         for (i=0; i<count; ++i)
584                 ir_printf("%+F\n", ous[i]->nodes[0]);
585
586         qsort(ous, count, sizeof(*ous), compare_ous);
587
588         ir_printf("\n\n");
589         for (i=0; i<count; ++i)
590                 ir_printf("%+F\n", ous[i]->nodes[0]);
591
592         /* reinsert into list in correct order */
593         for (i=0; i<count; ++i)
594                 list_add_tail(&ous[i]->units, &co->units);
595
596         assert(costs == co_get_max_copy_costs(co));
597 }
598 #endif
599
600 void co_build_ou_structure(copy_opt_t *co) {
601         DBG((dbg, LEVEL_1, "\tCollecting optimization units\n"));
602         INIT_LIST_HEAD(&co->units);
603         irg_walk_graph(co->irg, co_collect_units, NULL, co);
604 #ifdef QUICK_AND_DIRTY_HACK
605         co_sort_units(co);
606 #endif
607 }
608
609 void co_free_ou_structure(copy_opt_t *co) {
610         unit_t *curr, *tmp;
611         ASSERT_OU_AVAIL(co);
612         list_for_each_entry_safe(unit_t, curr, tmp, &co->units, units) {
613                 xfree(curr->nodes);
614                 xfree(curr->costs);
615                 xfree(curr);
616         }
617         co->units.next = NULL;
618 }
619
620 /* co_solve_heuristic() is implemented in becopyheur.c */
621
622 int co_get_max_copy_costs(const copy_opt_t *co) {
623         int i, res = 0;
624         unit_t *curr;
625
626         ASSERT_OU_AVAIL(co);
627
628         list_for_each_entry(unit_t, curr, &co->units, units) {
629                 res += curr->inevitable_costs;
630                 for (i=1; i<curr->node_count; ++i)
631                         res += curr->costs[i];
632         }
633         return res;
634 }
635
636 int co_get_inevit_copy_costs(const copy_opt_t *co) {
637         int res = 0;
638         unit_t *curr;
639
640         ASSERT_OU_AVAIL(co);
641
642         list_for_each_entry(unit_t, curr, &co->units, units)
643                 res += curr->inevitable_costs;
644         return res;
645 }
646
647 int co_get_copy_costs(const copy_opt_t *co) {
648         int i, res = 0;
649         unit_t *curr;
650
651         ASSERT_OU_AVAIL(co);
652
653         list_for_each_entry(unit_t, curr, &co->units, units) {
654                 int root_col = get_irn_col(curr->nodes[0]);
655                 DBG((dbg, LEVEL_1, "  %3d costs for root %+F color %d\n", curr->inevitable_costs, curr->nodes[0], root_col));
656                 res += curr->inevitable_costs;
657                 for (i=1; i<curr->node_count; ++i) {
658                         int arg_col = get_irn_col(curr->nodes[i]);
659                         if (root_col != arg_col) {
660                                 DBG((dbg, LEVEL_1, "  %3d for arg %+F color %d\n", curr->costs[i], curr->nodes[i], arg_col));
661                                 res += curr->costs[i];
662                         }
663                 }
664         }
665         return res;
666 }
667
668 int co_get_lower_bound(const copy_opt_t *co) {
669         int res = 0;
670         unit_t *curr;
671
672         ASSERT_OU_AVAIL(co);
673
674         list_for_each_entry(unit_t, curr, &co->units, units)
675                 res += curr->inevitable_costs + curr->min_nodes_costs;
676         return res;
677 }
678
679 void co_complete_stats(const copy_opt_t *co, co_complete_stats_t *stat)
680 {
681         bitset_t *seen = bitset_irg_malloc(co->irg);
682         affinity_node_t *an;
683
684         memset(stat, 0, sizeof(stat[0]));
685
686         /* count affinity edges. */
687         co_gs_foreach_aff_node(co, an) {
688                 neighb_t *neigh;
689                 stat->aff_nodes += 1;
690                 bitset_add_irn(seen, an->irn);
691                 co_gs_foreach_neighb(an, neigh) {
692                         if(!bitset_contains_irn(seen, neigh->irn)) {
693                                 stat->aff_edges += 1;
694                                 stat->max_costs += neigh->costs;
695
696                                 if (get_irn_col(an->irn) != get_irn_col(neigh->irn)) {
697                                         stat->costs += neigh->costs;
698                                         stat->unsatisfied_edges += 1;
699                                 }
700
701                                 if(nodes_interfere(co->cenv, an->irn, neigh->irn)) {
702                                         stat->aff_int += 1;
703                                         stat->inevit_costs += neigh->costs;
704                                 }
705
706                         }
707                 }
708         }
709
710         bitset_free(seen);
711 }
712
713 /******************************************************************************
714    _____                 _        _____ _
715   / ____|               | |      / ____| |
716  | |  __ _ __ __ _ _ __ | |__   | (___ | |_ ___  _ __ __ _  __ _  ___
717  | | |_ | '__/ _` | '_ \| '_ \   \___ \| __/ _ \| '__/ _` |/ _` |/ _ \
718  | |__| | | | (_| | |_) | | | |  ____) | || (_) | | | (_| | (_| |  __/
719   \_____|_|  \__,_| .__/|_| |_| |_____/ \__\___/|_|  \__,_|\__, |\___|
720                   | |                                       __/ |
721                   |_|                                      |___/
722  ******************************************************************************/
723
724 static int compare_affinity_node_t(const void *k1, const void *k2, size_t size) {
725         const affinity_node_t *n1 = k1;
726         const affinity_node_t *n2 = k2;
727         (void) size;
728
729         return (n1->irn != n2->irn);
730 }
731
732 static void add_edge(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
733         affinity_node_t new_node, *node;
734         neighb_t        *nbr;
735         int             allocnew = 1;
736
737         new_node.irn        = n1;
738         new_node.degree     = 0;
739         new_node.neighbours = NULL;
740         node = set_insert(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
741
742         for (nbr = node->neighbours; nbr; nbr = nbr->next)
743                 if (nbr->irn == n2) {
744                         allocnew = 0;
745                         break;
746                 }
747
748         /* if we did not find n2 in n1's neighbourhood insert it */
749         if (allocnew) {
750                 nbr        = obstack_alloc(&co->obst, sizeof(*nbr));
751                 nbr->irn   = n2;
752                 nbr->costs = 0;
753                 nbr->next  = node->neighbours;
754
755                 node->neighbours = nbr;
756                 node->degree++;
757         }
758
759         /* now nbr points to n1's neighbour-entry of n2 */
760         nbr->costs += costs;
761 }
762
763 static INLINE void add_edges(copy_opt_t *co, ir_node *n1, ir_node *n2, int costs) {
764         if (! be_ifg_connected(co->cenv->ifg, n1, n2)) {
765                 add_edge(co, n1, n2, costs);
766                 add_edge(co, n2, n1, costs);
767         }
768 }
769
770 static void build_graph_walker(ir_node *irn, void *env) {
771         copy_opt_t *co = env;
772         int pos, max;
773         const arch_register_t *reg;
774
775         if (!is_curr_reg_class(co, irn) || arch_irn_is(irn, ignore))
776                 return;
777
778         reg = arch_get_irn_register(irn);
779         if (arch_register_type_is(reg, ignore))
780                 return;
781
782         if (is_Reg_Phi(irn)) { /* Phis */
783                 for (pos=0, max=get_irn_arity(irn); pos<max; ++pos) {
784                         ir_node *arg = get_irn_n(irn, pos);
785                         add_edges(co, irn, arg, co->get_costs(co, irn, arg, pos));
786                 }
787         } else if (is_Perm_Proj(irn)) { /* Perms */
788                 ir_node *arg = get_Perm_src(irn);
789                 add_edges(co, irn, arg, co->get_costs(co, irn, arg, 0));
790         }
791         else { /* 2-address code */
792                 const arch_register_req_t *req = arch_get_register_req(irn, -1);
793                 if (is_2addr_code(req)) {
794                         const unsigned other = req->other_same;
795                         int i;
796
797                         for (i = 0; 1U << i <= other; ++i) {
798                                 if (other & (1U << i)) {
799                                         ir_node *other = get_irn_n(skip_Proj(irn), i);
800                                         if (!arch_irn_is(other, ignore))
801                                                 add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
802                                 }
803                         }
804                 }
805         }
806 }
807
808 void co_build_graph_structure(copy_opt_t *co) {
809         obstack_init(&co->obst);
810         co->nodes = new_set(compare_affinity_node_t, 32);
811
812         irg_walk_graph(co->irg, build_graph_walker, NULL, co);
813 }
814
815 void co_free_graph_structure(copy_opt_t *co) {
816         ASSERT_GS_AVAIL(co);
817
818         del_set(co->nodes);
819         obstack_free(&co->obst, NULL);
820         co->nodes = NULL;
821 }
822
823 /* co_solve_ilp1() co_solve_ilp2() are implemented in becopyilpX.c */
824
825 int co_gs_is_optimizable(copy_opt_t *co, ir_node *irn) {
826         affinity_node_t new_node, *n;
827
828         ASSERT_GS_AVAIL(co);
829
830         new_node.irn = irn;
831         n = set_find(co->nodes, &new_node, sizeof(new_node), hash_irn(new_node.irn));
832         if (n) {
833                 return (n->degree > 0);
834         } else
835                 return 0;
836 }
837
838 static int co_dump_appel_disjoint_constraints(const copy_opt_t *co, ir_node *a, ir_node *b)
839 {
840         ir_node *nodes[]  = { a, b };
841         bitset_t *constr[] = { NULL, NULL };
842         const arch_register_req_t *req;
843         int j;
844
845         constr[0] = bitset_alloca(co->cls->n_regs);
846         constr[1] = bitset_alloca(co->cls->n_regs);
847
848         for (j = 0; j < 2; ++j) {
849                 req = arch_get_register_req(nodes[j], BE_OUT_POS(0));
850                 if(arch_register_req_is(req, limited))
851                         rbitset_copy_to_bitset(req->limited, constr[j]);
852                 else
853                         bitset_set_all(constr[j]);
854
855         }
856
857         return !bitset_intersect(constr[0], constr[1]);
858 }
859
860 void co_dump_appel_graph(const copy_opt_t *co, FILE *f)
861 {
862         be_ifg_t *ifg  = co->cenv->ifg;
863         int *color_map = alloca(co->cls->n_regs * sizeof(color_map[0]));
864         int *node_map  = XMALLOCN(int, get_irg_last_idx(co->irg) + 1);
865
866         ir_node *irn;
867         void *it, *nit;
868         int n, n_regs;
869         unsigned i;
870
871         n_regs = 0;
872         for(i = 0; i < co->cls->n_regs; ++i) {
873                 const arch_register_t *reg = &co->cls->regs[i];
874                 color_map[i] = arch_register_type_is(reg, ignore) ? -1 : n_regs++;
875         }
876
877         /*
878          * n contains the first node number.
879          * the values below n are the pre-colored register nodes
880          */
881
882         it  = be_ifg_nodes_iter_alloca(ifg);
883         nit = be_ifg_neighbours_iter_alloca(ifg);
884
885         n = n_regs;
886         be_ifg_foreach_node(ifg, it, irn) {
887                 if (!arch_irn_is(irn, ignore))
888                         node_map[get_irn_idx(irn)] = n++;
889         }
890
891         fprintf(f, "%d %d\n", n, n_regs);
892
893         be_ifg_foreach_node(ifg, it, irn) {
894                 if (!arch_irn_is(irn, ignore)) {
895                         int idx            = node_map[get_irn_idx(irn)];
896                         affinity_node_t *a = get_affinity_info(co, irn);
897
898                         const arch_register_req_t *req;
899                         ir_node *adj;
900
901                         req = arch_get_register_req(irn, BE_OUT_POS(0));
902                         if(arch_register_req_is(req, limited)) {
903                                 for(i = 0; i < co->cls->n_regs; ++i) {
904                                         if(!rbitset_is_set(req->limited, i) && color_map[i] >= 0)
905                                                 fprintf(f, "%d %d -1\n", color_map[i], idx);
906                                 }
907                         }
908
909                         be_ifg_foreach_neighbour(ifg, nit, irn, adj) {
910                                 if (!arch_irn_is(adj, ignore) &&
911                                                 !co_dump_appel_disjoint_constraints(co, irn, adj)) {
912                                         int adj_idx = node_map[get_irn_idx(adj)];
913                                         if(idx < adj_idx)
914                                                 fprintf(f, "%d %d -1\n", idx, adj_idx);
915                                 }
916                         }
917
918                         if(a) {
919                                 neighb_t *n;
920
921                                 co_gs_foreach_neighb(a, n) {
922                                         if (!arch_irn_is(n->irn, ignore)) {
923                                                 int n_idx = node_map[get_irn_idx(n->irn)];
924                                                 if(idx < n_idx)
925                                                         fprintf(f, "%d %d %d\n", idx, n_idx, (int) n->costs);
926                                         }
927                                 }
928                         }
929                 }
930         }
931
932         xfree(node_map);
933 }
934
935 /*
936          ___ _____ ____   ____   ___ _____   ____                        _
937         |_ _|  ___/ ___| |  _ \ / _ \_   _| |  _ \ _   _ _ __ ___  _ __ (_)_ __   __ _
938          | || |_ | |  _  | | | | | | || |   | | | | | | | '_ ` _ \| '_ \| | '_ \ / _` |
939          | ||  _|| |_| | | |_| | |_| || |   | |_| | |_| | | | | | | |_) | | | | | (_| |
940         |___|_|   \____| |____/ \___/ |_|   |____/ \__,_|_| |_| |_| .__/|_|_| |_|\__, |
941                                                                   |_|            |___/
942 */
943
944 static const char *get_dot_color_name(size_t col)
945 {
946         static const char *names[] = {
947                 "blue",
948                 "red",
949                 "green",
950                 "yellow",
951                 "cyan",
952                 "magenta",
953                 "orange",
954                 "chocolate",
955                 "beige",
956                 "navy",
957                 "darkgreen",
958                 "darkred",
959                 "lightPink",
960                 "chartreuse",
961                 "lightskyblue",
962                 "linen",
963                 "pink",
964                 "lightslateblue",
965                 "mintcream",
966                 "red",
967                 "darkolivegreen",
968                 "mediumblue",
969                 "mistyrose",
970                 "salmon",
971                 "darkseagreen",
972                 "mediumslateblue"
973                 "moccasin",
974                 "tomato",
975                 "forestgreen",
976                 "darkturquoise",
977                 "palevioletred"
978         };
979
980         return col < sizeof(names)/sizeof(names[0]) ? names[col] : "white";
981 }
982
983 typedef struct _co_ifg_dump_t {
984         const copy_opt_t *co;
985         unsigned flags;
986 } co_ifg_dump_t;
987
988 static void ifg_dump_graph_attr(FILE *f, void *self)
989 {
990         (void) self;
991         fprintf(f, "overlap=scale");
992 }
993
994 static int ifg_is_dump_node(void *self, ir_node *irn)
995 {
996         (void)self;
997         return !arch_irn_is(irn, ignore);
998 }
999
1000 static void ifg_dump_node_attr(FILE *f, void *self, ir_node *irn)
1001 {
1002         co_ifg_dump_t *env         = self;
1003         const arch_register_t *reg = arch_get_irn_register(irn);
1004         const arch_register_req_t *req;
1005         int limited;
1006
1007         req = arch_get_register_req(irn, BE_OUT_POS(0));
1008         limited = arch_register_req_is(req, limited);
1009
1010         if(env->flags & CO_IFG_DUMP_LABELS) {
1011                 ir_fprintf(f, "label=\"%+F", irn);
1012
1013                 if((env->flags & CO_IFG_DUMP_CONSTR) && limited) {
1014                         bitset_t *bs = bitset_alloca(env->co->cls->n_regs);
1015                         rbitset_copy_to_bitset(req->limited, bs);
1016                         ir_fprintf(f, "\\n%B", bs);
1017                 }
1018                 ir_fprintf(f, "\" ");
1019         } else {
1020                 fprintf(f, "label=\"\" shape=point " );
1021         }
1022
1023         if(env->flags & CO_IFG_DUMP_SHAPE)
1024                 fprintf(f, "shape=%s ", limited ? "diamond" : "ellipse");
1025
1026         if(env->flags & CO_IFG_DUMP_COLORS)
1027                 fprintf(f, "style=filled color=%s ", get_dot_color_name(reg->index));
1028 }
1029
1030 static void ifg_dump_at_end(FILE *file, void *self)
1031 {
1032         co_ifg_dump_t *env = self;
1033         affinity_node_t *a;
1034
1035         co_gs_foreach_aff_node(env->co, a) {
1036                 const arch_register_t *ar = arch_get_irn_register(a->irn);
1037                 unsigned aidx = get_irn_idx(a->irn);
1038                 neighb_t *n;
1039
1040                 co_gs_foreach_neighb(a, n) {
1041                         const arch_register_t *nr = arch_get_irn_register(n->irn);
1042                         unsigned nidx = get_irn_idx(n->irn);
1043
1044                         if(aidx < nidx) {
1045                                 const char *color = nr == ar ? "blue" : "red";
1046                                 fprintf(file, "\tn%d -- n%d [weight=0.01 ", aidx, nidx);
1047                                 if(env->flags & CO_IFG_DUMP_LABELS)
1048                                         fprintf(file, "label=\"%d\" ", n->costs);
1049                                 if(env->flags & CO_IFG_DUMP_COLORS)
1050                                         fprintf(file, "color=%s ", color);
1051                                 else
1052                                         fprintf(file, "style=dotted");
1053                                 fprintf(file, "];\n");
1054                         }
1055                 }
1056         }
1057 }
1058
1059
1060 static be_ifg_dump_dot_cb_t ifg_dot_cb = {
1061         ifg_is_dump_node,
1062         ifg_dump_graph_attr,
1063         ifg_dump_node_attr,
1064         NULL,
1065         NULL,
1066         ifg_dump_at_end
1067 };
1068
1069
1070
1071 void co_dump_ifg_dot(const copy_opt_t *co, FILE *f, unsigned flags)
1072 {
1073         co_ifg_dump_t cod;
1074
1075         cod.co    = co;
1076         cod.flags = flags;
1077         be_ifg_dump_dot(co->cenv->ifg, co->irg, f, &ifg_dot_cb, &cod);
1078 }
1079
1080
1081 void co_solve_park_moon(copy_opt_t *opt)
1082 {
1083         (void) opt;
1084 }
1085
1086 static int void_algo(copy_opt_t *co)
1087 {
1088         (void) co;
1089         return 0;
1090 }
1091
1092 /*
1093                 _    _                  _ _   _
1094            / \  | | __ _  ___  _ __(_) |_| |__  _ __ ___  ___
1095           / _ \ | |/ _` |/ _ \| '__| | __| '_ \| '_ ` _ \/ __|
1096          / ___ \| | (_| | (_) | |  | | |_| | | | | | | | \__ \
1097         /_/   \_\_|\__, |\___/|_|  |_|\__|_| |_|_| |_| |_|___/
1098                            |___/
1099 */
1100
1101 typedef struct {
1102         co_algo_t  *algo;
1103         const char *name;
1104         int        can_improve_existing;
1105 } co_algo_info_t;
1106
1107 static co_algo_info_t algos[] = {
1108         { void_algo,               "none",  0 },
1109         { co_solve_heuristic,      "heur1", 0 },
1110         { co_solve_heuristic_new,  "heur2", 0 },
1111 #ifdef WITH_JVM
1112         { co_solve_heuristic_java, "heur3", 0 },
1113 #else
1114         { NULL,                    "heur3", 0 },
1115 #endif
1116         { co_solve_heuristic_mst,  "heur4", 0 },
1117 #ifdef WITH_ILP
1118         { co_solve_ilp2,           "ilp",   1 },
1119 #else
1120         { NULL,                    "ilp",   1 },
1121 #endif
1122         { NULL,                    "",      0 }
1123 };
1124
1125 /*
1126     __  __       _         ____       _
1127    |  \/  | __ _(_)_ __   |  _ \ _ __(_)_   _____ _ __
1128    | |\/| |/ _` | | '_ \  | | | | '__| \ \ / / _ \ '__|
1129    | |  | | (_| | | | | | | |_| | |  | |\ V /  __/ |
1130    |_|  |_|\__,_|_|_| |_| |____/|_|  |_| \_/ \___|_|
1131
1132 */
1133
1134 static FILE *my_open(const be_chordal_env_t *env, const char *prefix, const char *suffix)
1135 {
1136         FILE *result;
1137         char buf[1024];
1138         size_t i, n;
1139         char *tu_name;
1140
1141         n = strlen(env->birg->main_env->cup_name);
1142         tu_name = XMALLOCN(char, n + 1);
1143         strcpy(tu_name, env->birg->main_env->cup_name);
1144         for (i = 0; i < n; ++i)
1145                 if (tu_name[i] == '.')
1146                         tu_name[i] = '_';
1147
1148
1149         ir_snprintf(buf, sizeof(buf), "%s%s_%F_%s%s", prefix, tu_name, env->irg, env->cls->name, suffix);
1150         xfree(tu_name);
1151         result = fopen(buf, "wt");
1152         if(result == NULL) {
1153                 panic("Couldn't open '%s' for writing.", buf);
1154         }
1155
1156         return result;
1157 }
1158
1159 void co_driver(be_chordal_env_t *cenv)
1160 {
1161         ir_timer_t          *timer = ir_timer_register("firm.be.copyopt", "runtime");
1162         co_complete_stats_t before, after;
1163         copy_opt_t          *co;
1164         co_algo_t           *algo_func;
1165         int                 was_optimal = 0;
1166
1167         if (algo >= CO_ALGO_LAST)
1168                 return;
1169
1170         be_liveness_assure_chk(be_get_birg_liveness(cenv->birg));
1171
1172         co = new_copy_opt(cenv, cost_func);
1173         co_build_ou_structure(co);
1174         co_build_graph_structure(co);
1175
1176         co_complete_stats(co, &before);
1177
1178         be_stat_ev_ull("co_aff_nodes",    before.aff_nodes);
1179         be_stat_ev_ull("co_aff_edges",    before.aff_edges);
1180         be_stat_ev_ull("co_max_costs",    before.max_costs);
1181         be_stat_ev_ull("co_inevit_costs", before.inevit_costs);
1182         be_stat_ev_ull("co_aff_int",      before.aff_int);
1183
1184         be_stat_ev_ull("co_init_costs",   before.costs);
1185         be_stat_ev_ull("co_init_unsat",   before.unsatisfied_edges);
1186
1187         if (dump_flags & DUMP_BEFORE) {
1188                 FILE *f = my_open(cenv, "", "-before.dot");
1189                 co_dump_ifg_dot(co, f, style_flags);
1190                 fclose(f);
1191         }
1192
1193         /* if the algo can improve results, provide an initial solution with heur3 */
1194         if (improve && algos[algo].can_improve_existing) {
1195                 co_complete_stats_t stats;
1196
1197                 /* produce a heuristic solution */
1198 #ifdef WITH_JVM
1199                 co_solve_heuristic_java(co);
1200 #else
1201                 co_solve_heuristic(co);
1202 #endif /* WITH_JVM */
1203
1204                 /* do the stats and provide the current costs */
1205                 co_complete_stats(co, &stats);
1206                 be_stat_ev_ull("co_prepare_costs", stats.costs);
1207         }
1208
1209 #ifdef WITH_JVM
1210         /* start the JVM here so that it does not tamper the timing. */
1211         if (algo == CO_ALGO_HEUR3)
1212                 be_java_coal_start_jvm();
1213 #endif /* WITH_JVM */
1214
1215         algo_func = algos[algo].algo;
1216
1217         /* perform actual copy minimization */
1218         ir_timer_reset_and_start(timer);
1219         was_optimal = algo_func(co);
1220         ir_timer_stop(timer);
1221
1222         be_stat_ev("co_time", ir_timer_elapsed_msec(timer));
1223         be_stat_ev_ull("co_optimal", was_optimal);
1224
1225         if (dump_flags & DUMP_AFTER) {
1226                 FILE *f = my_open(cenv, "", "-after.dot");
1227                 co_dump_ifg_dot(co, f, style_flags);
1228                 fclose(f);
1229         }
1230
1231         co_complete_stats(co, &after);
1232
1233         if (do_stats) {
1234                 ulong64 optimizable_costs = after.max_costs - after.inevit_costs;
1235                 ulong64 evitable          = after.costs     - after.inevit_costs;
1236
1237                 ir_printf("%30F ", cenv->irg);
1238                 printf("%10s %10" ULL_FMT "%10" ULL_FMT "%10" ULL_FMT, cenv->cls->name, after.max_costs, before.costs, after.inevit_costs);
1239
1240                 if(optimizable_costs > 0)
1241                         printf("%10" ULL_FMT " %5.2f\n", after.costs, (evitable * 100.0) / optimizable_costs);
1242                 else
1243                         printf("%10" ULL_FMT " %5s\n", after.costs, "-");
1244         }
1245
1246         /* Dump the interference graph in Appel's format. */
1247         if (dump_flags & DUMP_APPEL) {
1248                 FILE *f = my_open(cenv, "", ".apl");
1249                 fprintf(f, "# %lld %lld\n", after.costs, after.unsatisfied_edges);
1250                 co_dump_appel_graph(co, f);
1251                 fclose(f);
1252         }
1253
1254         be_stat_ev_ull("co_after_costs", after.costs);
1255         be_stat_ev_ull("co_after_unsat", after.unsatisfied_edges);
1256
1257         co_free_graph_structure(co);
1258         co_free_ou_structure(co);
1259         free_copy_opt(co);
1260 }